// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
// This source file is part of the Cangjie project, licensed under Apache-2.0
// with Runtime Library Exception.
//
// See https://cangjie-lang.cn/pages/LICENSE for license information.
#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#define cfi_restore(reg) .cfi_restore reg
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
// CJThread structure offsets for ARM32
#define CJTHREAD_THREAD_OFFSET 0x08
#define CJTHREAD_CONTEXT_OFFSET 0x10
// CJThreadContext structure offsets for ARM32
#define CONTEXT_R4 0x00
#define CONTEXT_R5 0x04
#define CONTEXT_R6 0x08
#define CONTEXT_R7 0x0c
#define CONTEXT_R8 0x10
#define CONTEXT_R9 0x14
#define CONTEXT_R10 0x18
#define CONTEXT_R11FP 0x1c
#define CONTEXT_R13SP 0x20
#define CONTEXT_R14LR 0x24
#define CONTEXT_R15PC 0x28
#define CONTEXT_ARM32_D8 0x2c
#define CONTEXT_ARM32_D9 0x34
#define CONTEXT_ARM32_D10 0x3c
#define CONTEXT_ARM32_D11 0x44
#define CONTEXT_ARM32_D12 0x4c
#define CONTEXT_ARM32_D13 0x54
#define CONTEXT_ARM32_D14 0x5c
#define CONTEXT_ARM32_D15 0x64
#define CONTEXT_FPSCR 0x6c
#define THREAD_PROCESSOR_OFFSET 0x1c
#define ExclusiveScopeFrameSize 144
// ==============================================================================
// CJ_MCC_ExclusiveScope: Switch from cjthread to OS thread for exclusive execution
// ==============================================================================
//
// void* CJ_MCC_ExclusiveScope(void* executeClosure, void* closurePtr)
// r0 r1
//
// 1. Create exclusive cjthread
// 2. Switch to OS thread
// 3. Execute closure
// 4. Switch back to cjthread
//
// Frame layout (ExclusiveScopeFrameSize = 144 = 0x90):
// 0x00 r11 (fp)
// 0x04 lr
// 0x08 N2CSlotData: pc (reserved for MRT_SaveC2NContext)
// 0x0c N2CSlotData: fa (reserved for MRT_SaveC2NContext)
// 0x10 N2CSlotData: status (reserved for MRT_SaveC2NContext)
// 0x14 r4 (callee-saved)
// 0x18 r5 (callee-saved)
// 0x1c r6 (callee-saved)
// 0x20 r7 (callee-saved)
// 0x24 r8 (callee-saved)
// 0x28 r9 (callee-saved)
// 0x2c r10 (callee-saved)
// 0x30 d8
// 0x38 d9
// 0x40 d10
// 0x48 d11
// 0x50 d12
// 0x58 d13
// 0x60 d14
// 0x68 d15
// 0x70 newCJThread
// 0x74 oldCJThread
// 0x78 thread
// 0x7c oldProcessor
// 0x80 enterSafe
// 0x84 saved sp
.text
.align 2
.global CJ_MCC_ExclusiveScope
.type CJ_MCC_ExclusiveScope, %function
CJ_MCC_ExclusiveScope:
.cfi_startproc
sub sp, sp, #ExclusiveScopeFrameSize
str r11, [sp]
str lr, [sp, #0x04]
cfi_adjust_cfa_offset(ExclusiveScopeFrameSize)
cfi_rel_offset(r11, 0)
cfi_rel_offset(lr, 4)
mov r11, sp
cfi_def_cfa_register(r11)
// Save callee-saved registers (starts at 0x14, leaving 0x08-0x13 for N2CSlotData)
str r4, [r11, #0x14]
cfi_rel_offset(r4, 0x14)
str r5, [r11, #0x18]
cfi_rel_offset(r5, 0x18)
str r6, [r11, #0x1c]
cfi_rel_offset(r6, 0x1c)
str r7, [r11, #0x20]
cfi_rel_offset(r7, 0x20)
str r8, [r11, #0x24]
cfi_rel_offset(r8, 0x24)
str r9, [r11, #0x28]
cfi_rel_offset(r9, 0x28)
str r10, [r11, #0x2c]
cfi_rel_offset(r10, 0x2c)
// Save VFP callee-saved registers
vstr d8, [r11, #0x30]
vstr d9, [r11, #0x38]
vstr d10, [r11, #0x40]
vstr d11, [r11, #0x48]
vstr d12, [r11, #0x50]
vstr d13, [r11, #0x58]
vstr d14, [r11, #0x60]
vstr d15, [r11, #0x68]
mov r4, r0 // executeClosure
mov r5, r1 // closurePtr
// get oldCJThread, thread, oldProcessor
bl CJ_CJThreadGetHandle
cmp r0, #0
beq .L_exc_no_cjthread
str r0, [r11, #0x74] // oldCJThread
ldr r1, [r0, #CJTHREAD_THREAD_OFFSET]
cmp r1, #0
beq .L_exc_no_thread
str r1, [r11, #0x78] // thread
// Save oldProcessor before it is modified by ExclusiveExecutor
// thread->processor is at offset THREAD_PROCESSOR_OFFSET (0x1c)
ldr r2, [r1, #THREAD_PROCESSOR_OFFSET]
str r2, [r11, #0x7c] // oldProcessor
// Create new exclusive cjthread
mov r0, r4 // executeClosure
mov r1, r5 // closurePtr
mov r2, #0 // futureTi = 0
bl MCC_NewExclusiveCJThread
cmp r0, #0
beq .L_exc_create_failed
str r0, [r11, #0x70] // newCJThread
// Save C2N context (unwindPCForExclusiveStub must be before MRT_SaveC2NContext)
// First get ThreadLocalData
bl MRT_GetThreadLocalData
mov r10, r0 // save TLD in r10
.global unwindPCForExclusiveStub
unwindPCForExclusiveStub:
adr r0, unwindPCForExclusiveStub
mov r1, r11
mov r2, r10
bl MRT_SaveC2NContext
mov r0, #0
bl MRT_EnterSaferegion
str r0, [r11, #0x80] // enterSafe
str sp, [r11, #0x84] // saved sp
// Save cjthread context
ldr r6, [r11, #0x74] // oldCJThread
ldr r7, [r11, #0x78] // thread
add r8, r6, #CJTHREAD_CONTEXT_OFFSET
str r4, [r8, #CONTEXT_R4]
str r5, [r8, #CONTEXT_R5]
str r6, [r8, #CONTEXT_R6]
str r7, [r8, #CONTEXT_R7]
str r8, [r8, #CONTEXT_R8]
str r9, [r8, #CONTEXT_R9]
str r10, [r8, #CONTEXT_R10]
str r11, [r8, #CONTEXT_R11FP]
str sp, [r8, #CONTEXT_R13SP]
str lr, [r8, #CONTEXT_R14LR]
adr r0, .L_exc_return_point
str r0, [r8, #CONTEXT_R15PC]
vstr d8, [r8, #CONTEXT_ARM32_D8]
vstr d9, [r8, #CONTEXT_ARM32_D9]
vstr d10, [r8, #CONTEXT_ARM32_D10]
vstr d11, [r8, #CONTEXT_ARM32_D11]
vstr d12, [r8, #CONTEXT_ARM32_D12]
vstr d13, [r8, #CONTEXT_ARM32_D13]
vstr d14, [r8, #CONTEXT_ARM32_D14]
vstr d15, [r8, #CONTEXT_ARM32_D15]
vmrs r0, fpscr
str r0, [r8, #CONTEXT_FPSCR]
// Nested exclusive scope is already on OS thread stack.
// Switching sp again to thread->context.sp may clobber current frames.
ldr r0, [r11, #0x74] // oldCJThread
bl IsExclusiveCJThread
cmp r0, #0
bne .L_exc_skip_stack_switch
// Switch to OS thread
mov r0, r7 // arg0 = thread pointer
bl ExclusiveGetThreadContext // returns void* = &thread->context
cmp r0, #0
beq .L_exc_skip_stack_switch
mov r9, r0 // r9 = &thread->context
ldr r0, [r9, #CONTEXT_R13SP]
// Validate saved stack pointer before switching.
// If invalid (NULL / high-bit set / too small), skip stack switch.
cmp r0, #0
beq .L_exc_skip_stack_switch
tst r0, #0x80000000
bne .L_exc_skip_stack_switch
cmp r0, #0x1000
blt .L_exc_skip_stack_switch
mov sp, r0
ldr r0, [r9, #CONTEXT_FPSCR]
vmsr fpscr, r0
sub sp, sp, #64
bic sp, sp, #0x7
// Execute closure
ldr r0, [r11, #0x78] // thread
ldr r1, [r11, #0x70] // newCJThread
bl ExclusiveExecutor
b .L_exc_after_executor
.L_exc_skip_stack_switch:
// Already on OS thread stack, execute directly without switching sp.
ldr r0, [r11, #0x78] // thread
ldr r1, [r11, #0x70] // newCJThread
bl ExclusiveExecutor
.L_exc_after_executor:
// Restore to original stack frame (still on current function's stack)
ldr r0, [r11, #0x84] // saved sp
mov sp, r0
// Call ExclusiveRestore to complete the restoration
// ExclusiveRestore(oldCJThread, thread, newCJThread, oldProcessor)
// r0 r1 r2 r3
ldr r0, [r11, #0x74] // oldCJThread
ldr r1, [r11, #0x78] // thread
ldr r2, [r11, #0x70] // newCJThread
ldr r3, [r11, #0x7c] // oldProcessor
bl ExclusiveRestore
// After ExclusiveRestore, continue cleanup
.L_exc_return_point:
ldr r0, [r11, #0x84]
mov sp, r0
ldr r0, [r11, #0x80] // enterSafe
cmp r0, #0
beq .L_exc_skip_leave
bl MRT_LeaveSaferegion
bl MRT_GetThreadLocalData
mov r4, r0
b .L_exc_do_delete
.L_exc_skip_leave:
bl MRT_GetThreadLocalData
mov r4, r0
.L_exc_do_delete:
mov r0, r4
bl MRT_DeleteC2NContext
mov r0, #0
b .L_exc_cleanup
.L_exc_create_failed:
.L_exc_no_cjthread:
.L_exc_no_thread:
mov r0, #0
.L_exc_cleanup:
// Restore VFP callee-saved registers
vldr d8, [r11, #0x30]
vldr d9, [r11, #0x38]
vldr d10, [r11, #0x40]
vldr d11, [r11, #0x48]
vldr d12, [r11, #0x50]
vldr d13, [r11, #0x58]
vldr d14, [r11, #0x60]
vldr d15, [r11, #0x68]
// Restore callee-saved registers
ldr r4, [r11, #0x14]
cfi_restore(r4)
ldr r5, [r11, #0x18]
cfi_restore(r5)
ldr r6, [r11, #0x1c]
cfi_restore(r6)
ldr r7, [r11, #0x20]
cfi_restore(r7)
ldr r8, [r11, #0x24]
cfi_restore(r8)
ldr r9, [r11, #0x28]
cfi_restore(r9)
ldr r10, [r11, #0x2c]
cfi_restore(r10)
ldr r11, [sp]
ldr lr, [sp, #0x04]
add sp, sp, #ExclusiveScopeFrameSize
cfi_adjust_cfa_offset(-ExclusiveScopeFrameSize)
cfi_restore(r11)
cfi_restore(lr)
bx lr
.cfi_endproc
.size CJ_MCC_ExclusiveScope, .-CJ_MCC_ExclusiveScope
// ==============================================================================
// ExecuteExclusiveCangjieStub: Execute Cangjie closure with new ThreadLocalData
// ==============================================================================
//
// void* ExecuteExclusiveCangjieStub(void* sret, void* arg1, void* closureObj,
// void* executeClosure, void* threadData)
// r0 r1 r2
// r3 [sp+0]
//
// Execute closure: load function pointer from executeClosure+8, call func(sret, closureObj)
//
// Frame layout (StubFrameSize = 64 = 0x40):
// 0x00 r11 (fp)
// 0x04 lr
// 0x08 N2CSlotData: pc (reserved for MRT_SaveTopManagedContextToN2CStub)
// 0x0c N2CSlotData: fa (reserved for MRT_SaveTopManagedContextToN2CStub)
// 0x10 N2CSlotData: status (reserved for MRT_SaveTopManagedContextToN2CStub)
// 0x14 r4 (callee-saved)
// 0x18 r5 (callee-saved)
// 0x1c r6 (callee-saved)
// 0x20 r10 (callee-saved)
// 0x24 sret (r0)
// 0x28 arg1 (r1)
// 0x2c closureObj (r2)
// 0x30 executeClosure (r3)
// 0x34 original sp (for aligned stack restore)
#define StubFrameSize 64
.text
.align 2
.global ExecuteExclusiveCangjieStub
.type ExecuteExclusiveCangjieStub, %function
ExecuteExclusiveCangjieStub:
.cfi_startproc
// Ensure 16-byte stack alignment before calling closure.
mov r12, sp
tst sp, #0x0f
it ne
subne sp, sp, #8
sub sp, sp, #StubFrameSize
str r11, [sp]
str lr, [sp, #0x04]
cfi_adjust_cfa_offset(StubFrameSize)
cfi_rel_offset(r11, 0)
cfi_rel_offset(lr, 4)
mov r11, sp
cfi_def_cfa_register(r11)
str r4, [r11, #0x14]
cfi_rel_offset(r4, 0x14)
str r5, [r11, #0x18]
cfi_rel_offset(r5, 0x18)
str r6, [r11, #0x1c]
cfi_rel_offset(r6, 0x1c)
str r10, [r11, #0x20]
cfi_rel_offset(r10, 0x20)
// Save call arguments across runtime calls
str r0, [r11, #0x24] // sret
str r1, [r11, #0x28] // arg1
str r2, [r11, #0x2c] // closureObj
str r3, [r11, #0x30] // executeClosure
str r12, [r11, #0x34] // original sp before alignment
// Save context for N2C
mov r0, r11
bl MRT_SaveTopManagedContextToN2CStub
// Reload call arguments (may be updated by GC)
ldr r4, [r11, #0x24] // sret
ldr r5, [r11, #0x28] // arg1
ldr r6, [r11, #0x2c] // closureObj
ldr r3, [r11, #0x30] // executeClosure (must be in r3)
// Call closure: func(sret, arg1, closureObj)
mov r0, r4 // arg0 = sret
mov r1, r5 // arg1 = arg1
mov r2, r6 // arg2 = arg1
ldr r12, [r3, #8] // load function pointer from executeClosure+8
mov r3, r5
blx r12
.global unwindPCForExclusiveStubFull
unwindPCForExclusiveStubFull:
mov r4, r0 // save return value
mov r0, r11
bl MRT_RestoreTopManagedContextFromN2CStub
mov r0, r4 // restore return value
ldr r4, [r11, #0x14]
cfi_restore(r4)
ldr r5, [r11, #0x18]
cfi_restore(r5)
ldr r6, [r11, #0x1c]
cfi_restore(r6)
ldr r10, [r11, #0x20]
cfi_restore(r10)
ldr r12, [r11, #0x34]
ldr r11, [sp]
ldr lr, [sp, #0x04]
mov sp, r12
cfi_def_cfa_register(sp)
cfi_restore(r11)
cfi_restore(lr)
bx lr
.cfi_endproc
.size ExecuteExclusiveCangjieStub, .-ExecuteExclusiveCangjieStub