diff --git a/kernel/arch/aarch64/sched/sched.cpp b/kernel/arch/aarch64/sched/sched.cpp index 8202e0f9..25df15e4 100644 --- a/kernel/arch/aarch64/sched/sched.cpp +++ b/kernel/arch/aarch64/sched/sched.cpp @@ -61,6 +61,9 @@ __PRIVILEGED_CODE static void save_cpu_context( for (int i = 0; i < 31; i++) { ctx->x[i] = tf->x[i]; } + // ctx->sp tracks the interrupted stack pointer for the return mode in ctx->pstate: + // - EL0t / EL1t: SP_EL0 + // - EL1h: SP_EL1 ctx->sp = tf->sp; ctx->pc = tf->elr; ctx->pstate = tf->spsr; @@ -80,6 +83,29 @@ __PRIVILEGED_CODE static void load_cpu_context( tf->spsr = ctx->pstate; } +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE static void prepare_trap_return_stacks( + aarch64::trap_frame* tf, const task* next +) { + // trap_frame.sp is consumed as the return stack for the selected mode: + // - EL0t / EL1t -> SP_EL0 + // - EL1h -> SP_EL1 + // + // trap_frame.sp_el1 is consumed by vector epilogue when returning to + // EL0t/EL1t, establishing the selected task's EL1 exception stack for + // subsequent traps. + tf->sp_el1 = next->exec.system_stack_top; + +#ifdef DEBUG + uint64_t mode = tf->spsr & aarch64::SPSR_MODE_MASK; + if (mode != aarch64::SPSR_EL1H && tf->sp_el1 == 0) { + log::fatal("sched/aarch64: missing system_stack_top for non-EL1h return"); + } +#endif +} + /** * @note Privilege: **required** */ @@ -138,6 +164,7 @@ __PRIVILEGED_CODE void on_yield(aarch64::trap_frame* tf) { fpu::restore(&next->exec.fpu_ctx); load_cpu_context(&next->exec.cpu_ctx, tf); + prepare_trap_return_stacks(tf, next); cpu::write_tls_base(next->exec.tls_base); arch_post_switch(next); // Defer prev->on_cpu clear until switch teardown is complete. @@ -173,6 +200,7 @@ __PRIVILEGED_CODE void on_tick(aarch64::trap_frame* tf) { fpu::restore(&next->exec.fpu_ctx); load_cpu_context(&next->exec.cpu_ctx, tf); + prepare_trap_return_stacks(tf, next); cpu::write_tls_base(next->exec.tls_base); arch_post_switch(next); // Prevent early off-CPU publication while trap exit still depends on prev context. diff --git a/kernel/arch/aarch64/trap/trap_frame.h b/kernel/arch/aarch64/trap/trap_frame.h index a1117d80..62ddf757 100644 --- a/kernel/arch/aarch64/trap/trap_frame.h +++ b/kernel/arch/aarch64/trap/trap_frame.h @@ -8,13 +8,22 @@ namespace aarch64 { struct alignas(16) trap_frame { uint64_t x[31]; // x0-x30 uint64_t sp; // interrupted-context SP (EL0 traps store SP_EL0) + uint64_t sp_el1; // EL1 exception stack pointer to use after trap return uint64_t elr; uint64_t spsr; uint64_t esr; uint64_t far; + uint64_t _reserved0; }; -static_assert(sizeof(trap_frame) == 0x120); +static_assert(__builtin_offsetof(trap_frame, x) == 0x00); +static_assert(__builtin_offsetof(trap_frame, sp) == 0xF8); +static_assert(__builtin_offsetof(trap_frame, sp_el1) == 0x100); +static_assert(__builtin_offsetof(trap_frame, elr) == 0x108); +static_assert(__builtin_offsetof(trap_frame, spsr) == 0x110); +static_assert(__builtin_offsetof(trap_frame, esr) == 0x118); +static_assert(__builtin_offsetof(trap_frame, far) == 0x120); +static_assert(sizeof(trap_frame) == 0x130); inline uint64_t get_ip(const trap_frame* tf) { return tf->elr; diff --git a/kernel/arch/aarch64/trap/vectors.S b/kernel/arch/aarch64/trap/vectors.S index 04b3d356..5a0a27e6 100644 --- a/kernel/arch/aarch64/trap/vectors.S +++ b/kernel/arch/aarch64/trap/vectors.S @@ -20,15 +20,16 @@ /* trap::trap_frame offsets (bytes) */ .set TF_X0, 0x00 .set TF_SP, 0xF8 -.set TF_ELR, 0x100 -.set TF_SPSR, 0x108 -.set TF_ESR, 0x110 -.set TF_FAR, 0x118 -.set TF_SIZE, 0x120 +.set TF_SP_EL1, 0x100 +.set TF_ELR, 0x108 +.set TF_SPSR, 0x110 +.set TF_ESR, 0x118 +.set TF_FAR, 0x120 +.set TF_SIZE, 0x130 .macro STLX_A64_SAVE_GPRS /* ARMv8-A guarantees SP_EL1 is 16-byte aligned on exception entry. - * TF_SIZE (0x120) is 16-byte aligned, so SP remains aligned after subtraction. */ + * TF_SIZE (0x130) is 16-byte aligned, so SP remains aligned after subtraction. */ sub sp, sp, #TF_SIZE stp x0, x1, [sp, #0x00] stp x2, x3, [sp, #0x10] @@ -48,7 +49,7 @@ str x30, [sp, #0xF0] .endm -.macro STLX_A64_RESTORE_GPRS +.macro STLX_A64_RESTORE_GPRS_EXCEPT_X16 ldp x0, x1, [sp, #0x00] ldp x2, x3, [sp, #0x10] ldp x4, x5, [sp, #0x20] @@ -57,7 +58,7 @@ ldp x10, x11, [sp, #0x50] ldp x12, x13, [sp, #0x60] ldp x14, x15, [sp, #0x70] - ldp x16, x17, [sp, #0x80] + ldr x17, [sp, #0x88] ldp x18, x19, [sp, #0x90] ldp x20, x21, [sp, #0xA0] ldp x22, x23, [sp, #0xB0] @@ -65,7 +66,6 @@ ldp x26, x27, [sp, #0xD0] ldp x28, x29, [sp, #0xE0] ldr x30, [sp, #0xF0] - add sp, sp, #TF_SIZE .endm .macro STLX_A64_CAPTURE_SYSREGS @@ -83,7 +83,7 @@ .macro STLX_A64_WRITEBACK_SYSREGS /* Use x16 (IP0, caller-saved scratch) as temporary to avoid clobbering x0. - * x16 will be restored from trap_frame at offset 0x80 in RESTORE_GPRS. */ + * x16 will be restored from trap_frame at offset 0x80 in epilogue. */ ldr x16, [sp, #TF_ELR] msr elr_el1, x16 ldr x16, [sp, #TF_SPSR] @@ -92,12 +92,61 @@ .macro STLX_A64_CALL_C handler /* SP is 16-byte aligned: ARMv8-A guarantees SP_EL1 alignment on exception entry, - * and TF_SIZE (0x120 = 288 = 16×18) preserves alignment. AArch64 PCS requires + * and TF_SIZE (0x130 = 304 = 16×19) preserves alignment. AArch64 PCS requires * 16-byte alignment at function call boundaries. */ mov x0, sp /* arg0 = trap_frame* */ bl \handler .endm +/* + * Mode-aware trap epilogue: + * - Return to EL1h (SPSR.M[0]=1): restore SP_EL1 from trap_frame.sp + * - Return to EL0t/EL1t (SPSR.M[0]=0): + * * restore SP_EL0 from trap_frame.sp + * * switch active EL1 stack to trap_frame.sp_el1 + * + * This is required when scheduler switches away from an in-kernel blocked + * context: the outgoing task's kernel call chain must not remain as the + * active SP_EL1 for unrelated tasks. + */ +.macro STLX_A64_RETURN_FROM_FRAME + ldr x17, [sp, #TF_SPSR] + tst x17, #1 + b.ne 1f + + /* Return to EL0t/EL1t: publish selected task SP_EL0. */ + ldr x17, [sp, #TF_SP] + msr sp_el0, x17 + + STLX_A64_WRITEBACK_SYSREGS + isb + mov x16, sp + STLX_A64_RESTORE_GPRS_EXCEPT_X16 + + /* Install selected task's exception stack for the next trap. */ + ldr x17, [x16, #TF_SP_EL1] + mov sp, x17 + + /* Restore scratch regs last (x16 held frame base). */ + ldr x17, [x16, #0x88] + ldr x16, [x16, #0x80] + eret + +1: + /* Return to EL1h: resume at exact interrupted kernel SP. */ + STLX_A64_WRITEBACK_SYSREGS + isb + mov x16, sp + STLX_A64_RESTORE_GPRS_EXCEPT_X16 + ldr x17, [x16, #TF_SP] + mov sp, x17 + + /* Restore scratch regs last (x16 held frame base). */ + ldr x17, [x16, #0x88] + ldr x16, [x16, #0x80] + eret +.endm + /* EL1-origin prologue/epilogue */ .macro STLX_A64_EL1_ENTRY handler STLX_A64_SAVE_GPRS @@ -106,25 +155,11 @@ /* Use x16 (IP0, caller-saved scratch) as temporary. x16 already saved above. */ add x16, sp, #TF_SIZE str x16, [sp, #TF_SP] + str x16, [sp, #TF_SP_EL1] STLX_A64_CAPTURE_SYSREGS STLX_A64_CALL_C \handler - - /* If returning to a mode that uses SP_EL0 (EL0t/EL1t), honor trap_frame.sp. - * This is required when the scheduler switches tasks from an EL1h context: - * load_cpu_context() updates trap_frame.sp for the selected task, and we - * must publish it to SP_EL0 before eret. */ - ldr x16, [sp, #TF_SPSR] - tst x16, #1 - b.ne 1f - ldr x16, [sp, #TF_SP] - msr sp_el0, x16 -1: - - STLX_A64_WRITEBACK_SYSREGS - isb - STLX_A64_RESTORE_GPRS - eret + STLX_A64_RETURN_FROM_FRAME .endm /* EL0-origin prologue/epilogue */ @@ -135,19 +170,12 @@ /* Use x16 (IP0, caller-saved scratch) as temporary. x16 already saved above. */ mrs x16, sp_el0 str x16, [sp, #TF_SP] + add x16, sp, #TF_SIZE + str x16, [sp, #TF_SP_EL1] STLX_A64_CAPTURE_SYSREGS STLX_A64_CALL_C \handler - - /* Allow handler to change return SP_EL0 via trap_frame.sp */ - /* Use x16 (IP0, caller-saved scratch) as temporary. x16 will be restored below. */ - ldr x16, [sp, #TF_SP] - msr sp_el0, x16 - - STLX_A64_WRITEBACK_SYSREGS - isb - STLX_A64_RESTORE_GPRS - eret + STLX_A64_RETURN_FROM_FRAME .endm /* @@ -175,19 +203,12 @@ /* Use x16 (IP0, caller-saved scratch) as temporary. x16 already saved above. */ mrs x16, sp_el0 str x16, [sp, #TF_SP] + add x16, sp, #TF_SIZE + str x16, [sp, #TF_SP_EL1] STLX_A64_CAPTURE_SYSREGS STLX_A64_CALL_C \handler - - /* Allow handler to change return SP_EL0 via trap_frame.sp */ - /* Use x16 (IP0, caller-saved scratch) as temporary. x16 will be restored below. */ - ldr x16, [sp, #TF_SP] - msr sp_el0, x16 - - STLX_A64_WRITEBACK_SYSREGS - isb - STLX_A64_RESTORE_GPRS - eret + STLX_A64_RETURN_FROM_FRAME .endm /* 2KB-aligned vector table */ diff --git a/kernel/sched/sched_internal.h b/kernel/sched/sched_internal.h index fd0b438d..9408db8a 100644 --- a/kernel/sched/sched_internal.h +++ b/kernel/sched/sched_internal.h @@ -14,7 +14,8 @@ __PRIVILEGED_CODE void arch_init_task_context( /** * Arch-specific: called after picking the next task, before returning - * to trap exit. Updates TSS.RSP0 on x86 (no-op on aarch64). + * to trap exit. Updates architecture-specific post-switch state + * (e.g. TSS.RSP0 on x86, translation roots on aarch64). * @note Privilege: **required** */ __PRIVILEGED_CODE void arch_post_switch(task* next);