From cf4debc3bbe578febcec554cb48631fd2ebf9ea6 Mon Sep 17 00:00:00 2001 From: Kyle Date: Tue, 16 Jun 2026 16:56:59 +0800 Subject: [PATCH 1/4] fix: Support LA signal context; Fix order of `execve` params. --- os/src/arch/loongarch64/mod.rs | 5 +- os/src/arch/loongarch64/trap.rs | 193 ++++++++++++++++++++++++++++++- os/src/arch/riscv/mod.rs | 5 +- os/src/arch/riscv/trap.rs | 195 +++++++++++++++++++++++++++++++- os/src/hal/mod.rs | 2 + os/src/hal/traits.rs | 27 +++++ os/src/signal/action.rs | 119 ++++++------------- os/src/signal/mod.rs | 41 +++---- os/src/sync/futex.rs | 13 ++- os/src/syscall/mman.rs | 6 +- os/src/syscall/mod.rs | 10 +- os/src/syscall/process.rs | 17 +-- os/src/syscall/signal.rs | 61 +++------- os/src/syscall/sync.rs | 26 +++-- os/src/task/mod.rs | 4 +- os/src/trap/context.rs | 14 ++- os/src/trap/mod.rs | 105 +++++++++++------ 17 files changed, 617 insertions(+), 226 deletions(-) diff --git a/os/src/arch/loongarch64/mod.rs b/os/src/arch/loongarch64/mod.rs index 5a09840b..0325e589 100644 --- a/os/src/arch/loongarch64/mod.rs +++ b/os/src/arch/loongarch64/mod.rs @@ -9,4 +9,7 @@ pub mod trap; pub use hart::{read_time, set_timer_deadline, LoongArchHartId}; pub use paging::LoongArchPaging; -pub use trap::{LoongArchInterruptControl, LoongArchTrapContextAbi, LoongArchTrapMachine}; +pub use trap::{ + LoongArchInterruptControl, LoongArchSignalAbi, LoongArchSyscallAbi, LoongArchTrapContextAbi, + LoongArchTrapMachine, +}; diff --git a/os/src/arch/loongarch64/trap.rs b/os/src/arch/loongarch64/trap.rs index c7d1288e..40d31abd 100644 --- a/os/src/arch/loongarch64/trap.rs +++ b/os/src/arch/loongarch64/trap.rs @@ -3,7 +3,13 @@ use core::arch::{asm, global_asm}; use crate::config::TRAMPOLINE; -use crate::hal::traits::{InterruptControl, TrapCause, TrapContextAbi, TrapInfo, TrapMachine}; +use crate::hal::traits::{ + CloneArgs, InterruptControl, NamedReg, SyscallAbi, TrapCause, TrapContextAbi, TrapInfo, + TrapMachine, +}; +use crate::signal::{SignalAbi, SignalAction, SignalBit, SigSetT, StackT}; +use crate::syscall::Pod; +use crate::trap::TrapContext; global_asm!(include_str!("trap.S")); @@ -50,6 +56,12 @@ pub struct LoongArchTrapMachine; /// LoongArch64 register-layout helpers for the common trap context. pub struct LoongArchTrapContextAbi; +/// LoongArch64 Linux signal ABI implementation. +pub struct LoongArchSignalAbi; + +/// LoongArch64 legacy Linux syscall ABI implementation. +pub struct LoongArchSyscallAbi; + /// LoongArch64 trap frame layout shared with `trap.S`. #[repr(C)] #[derive(Debug, Clone, Copy)] @@ -65,6 +77,66 @@ pub struct LoongArchTrapContextFrame { pub fcsr: usize, } +/// LoongArch musl raw `rt_sigaction` syscall layout: +/// handler, flags, and the low 64 bits of sigset_t. +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct LoongArchUserSigAction { + pub handler: usize, + pub sa_flags: usize, + pub sa_mask: u64, +} + +impl Pod for LoongArchUserSigAction {} + +impl SyscallAbi for LoongArchSyscallAbi { + fn decode_clone_args(args: [usize; 6]) -> CloneArgs { + // Linux LoongArch does not select CONFIG_CLONE_BACKWARDS: + // clone(flags, stack, parent_tidptr, child_tidptr, tls). + CloneArgs { + flags: args[0], + stack: args[1], + parent_tid: args[2], + tls: args[4], + child_tid: args[3], + } + } +} + +/// LoongArch musl `mcontext_t`. +/// +/// Verified against the local LoongArch musl headers: +/// size = 272, align = 16, pc at 0, gregs at 8, flags at 264. +#[repr(C, align(16))] +#[derive(Debug, Clone, Copy)] +pub struct LoongArchMContext { + pub pc: usize, + pub gregs: [usize; 32], + pub flags: u32, + pub _pad: u32, +} + +impl Pod for LoongArchMContext {} + +const _: [(); 272] = [(); core::mem::size_of::()]; +const _: [(); 16] = [(); core::mem::align_of::()]; + +#[repr(C, align(16))] +#[derive(Debug, Clone, Copy)] +pub struct LoongArchUContext { + pub uc_flags: usize, + pub uc_link: usize, + pub uc_stack: StackT, + pub uc_sigmask: SigSetT, + pub uc_pad: isize, + pub uc_mcontext: LoongArchMContext, +} + +impl Pod for LoongArchUContext {} + +const _: [(); 448] = [(); core::mem::size_of::()]; +const _: [(); 16] = [(); core::mem::align_of::()]; + /// 用户态 `rt_sigreturn` trampoline 机器码。 /// /// 等价指令序列: @@ -201,6 +273,89 @@ impl TrapMachine for LoongArchTrapMachine { } } +impl SignalAbi for LoongArchSignalAbi { + type UserSigAction = LoongArchUserSigAction; + type UContext = LoongArchUContext; + + fn decode_user_sigaction(action: Self::UserSigAction) -> SignalAction { + SignalAction { + handler: action.handler, + sa_flags: action.sa_flags as u32, + sa_restorer: 0, + sa_mask: SignalBit::from_user_bits(action.sa_mask).bits(), + } + } + + fn encode_user_sigaction(action: SignalAction) -> Self::UserSigAction { + Self::UserSigAction { + handler: action.handler, + sa_flags: action.sa_flags as usize, + sa_mask: SignalBit::from_bits(action.sa_mask) + .unwrap_or(SignalBit::empty()) + .user_bits(), + } + } + + fn user_sigaction_parts(action: &Self::UserSigAction) -> (usize, usize, usize, u64) { + (action.handler, action.sa_flags, 0, action.sa_mask) + } + + fn build_ucontext(trap_cx: &TrapContext, old_mask: u64) -> Self::UContext { + let mut gregs = [0usize; 32]; + for (idx, reg) in gregs.iter_mut().enumerate() { + *reg = trap_cx.reg(idx); + } + gregs[0] = 0; + + Self::UContext { + uc_flags: 0, + uc_link: 0, + uc_stack: StackT { + ss_sp: 0, + ss_flags: 0, + ss_size: 0, + }, + uc_sigmask: SigSetT::from_signal_bits(old_mask), + uc_pad: 0, + uc_mcontext: LoongArchMContext { + pc: trap_cx.user_pc(), + gregs, + flags: 0, + _pad: 0, + }, + } + } + + fn signal_mask(ucontext: &Self::UContext) -> u64 { + ucontext.uc_sigmask.low_bits() + } + + fn restore_ucontext(ucontext: &Self::UContext, trap_cx: &mut TrapContext) { + trap_cx.set_user_pc(ucontext.uc_mcontext.pc); + for idx in 1..32 { + trap_cx.set_reg(idx, ucontext.uc_mcontext.gregs[idx]); + } + } + + fn saved_pc(ucontext: &Self::UContext) -> usize { + ucontext.uc_mcontext.pc + } + + fn set_saved_pc(ucontext: &mut Self::UContext, pc: usize) { + ucontext.uc_mcontext.pc = pc; + } + + fn saved_arg0(ucontext: &Self::UContext) -> usize { + let index = ::signal_gpr_arg0_index(); + ucontext.uc_mcontext.gregs[index] + } + + fn set_saved_arg0(ucontext: &mut Self::UContext, value: usize) { + let index = ::signal_gpr_arg0_index(); + ucontext.uc_mcontext.gregs[index] = value; + } +} + impl TrapContextAbi for LoongArchTrapContextAbi { type Frame = LoongArchTrapContextFrame; @@ -323,6 +478,42 @@ impl TrapContextAbi for LoongArchTrapContextAbi { frame.f.copy_from_slice(fpregs); frame.fcsr = fcsr as usize; } + + fn fault_dump_summary(frame: &Self::Frame) -> [NamedReg; 7] { + [ + NamedReg { name: "ra", value: frame.r[1] }, + NamedReg { name: "sp", value: frame.r[3] }, + NamedReg { name: "fp", value: frame.r[22] }, + NamedReg { name: "tp", value: frame.r[2] }, + NamedReg { name: "a0", value: frame.r[4] }, + NamedReg { name: "a1", value: frame.r[5] }, + NamedReg { name: "a7", value: frame.r[11] }, + ] + } + + fn fault_dump_detail(frame: &Self::Frame) -> [NamedReg; 19] { + [ + NamedReg { name: "a2", value: frame.r[6] }, + NamedReg { name: "a3", value: frame.r[7] }, + NamedReg { name: "a4", value: frame.r[8] }, + NamedReg { name: "a5", value: frame.r[9] }, + NamedReg { name: "a6", value: frame.r[10] }, + NamedReg { name: "t0", value: frame.r[12] }, + NamedReg { name: "t1", value: frame.r[13] }, + NamedReg { name: "t2", value: frame.r[14] }, + NamedReg { name: "t3", value: frame.r[15] }, + NamedReg { name: "t4", value: frame.r[16] }, + NamedReg { name: "t5", value: frame.r[17] }, + NamedReg { name: "t6", value: frame.r[18] }, + NamedReg { name: "t7", value: frame.r[19] }, + NamedReg { name: "t8", value: frame.r[20] }, + NamedReg { name: "u0", value: frame.r[21] }, + NamedReg { name: "s0", value: frame.r[23] }, + NamedReg { name: "s1", value: frame.r[24] }, + NamedReg { name: "s2", value: frame.r[25] }, + NamedReg { name: "s3", value: frame.r[26] }, + ] + } } #[inline] diff --git a/os/src/arch/riscv/mod.rs b/os/src/arch/riscv/mod.rs index 65213789..4c1fc2a0 100644 --- a/os/src/arch/riscv/mod.rs +++ b/os/src/arch/riscv/mod.rs @@ -10,4 +10,7 @@ pub mod trap; pub use hart::RiscvHartId; pub use paging::Sv39Paging; -pub use trap::{RiscvInterruptControl, RiscvTrapContextAbi, RiscvTrapMachine}; +pub use trap::{ + RiscvInterruptControl, RiscvSignalAbi, RiscvSyscallAbi, RiscvTrapContextAbi, + RiscvTrapMachine, +}; diff --git a/os/src/arch/riscv/trap.rs b/os/src/arch/riscv/trap.rs index 6197644a..e8fe7525 100644 --- a/os/src/arch/riscv/trap.rs +++ b/os/src/arch/riscv/trap.rs @@ -8,7 +8,13 @@ use riscv::register::{ sie, stval, stvec, }; use crate::config::TRAMPOLINE; -use crate::hal::traits::{InterruptControl, TrapCause, TrapContextAbi, TrapInfo, TrapMachine}; +use crate::hal::traits::{ + CloneArgs, InterruptControl, NamedReg, SyscallAbi, TrapCause, TrapContextAbi, TrapInfo, + TrapMachine, +}; +use crate::signal::{SignalAbi, SignalAction, SignalBit, SigSetT, StackT}; +use crate::syscall::Pod; +use crate::trap::TrapContext; global_asm!(include_str!("trap.S")); @@ -21,6 +27,12 @@ pub struct RiscvTrapMachine; /// RISC-V register-layout helpers for the common [`TrapContext`](crate::trap::TrapContext). pub struct RiscvTrapContextAbi; +/// RISC-V Linux signal ABI implementation. +pub struct RiscvSignalAbi; + +/// RISC-V legacy Linux syscall ABI implementation. +pub struct RiscvSyscallAbi; + /// RISC-V trap frame layout shared with `trap.S`. #[repr(C)] #[derive(Debug, Clone, Copy)] @@ -45,6 +57,74 @@ pub struct RiscvTrapContextFrame { pub fcsr: usize, } +/// RISC-V musl raw `rt_sigaction` syscall layout used by this kernel. +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct RiscvUserSigAction { + pub handler: usize, + pub sa_flags: usize, + pub sa_mask: u64, +} + +impl Pod for RiscvUserSigAction {} + +impl SyscallAbi for RiscvSyscallAbi { + fn decode_clone_args(args: [usize; 6]) -> CloneArgs { + // Linux RISC-V selects CONFIG_CLONE_BACKWARDS: + // clone(flags, stack, parent_tidptr, tls, child_tidptr). + CloneArgs { + flags: args[0], + stack: args[1], + parent_tid: args[2], + tls: args[3], + child_tid: args[4], + } + } +} + +/// Floating-point state area embedded in riscv64 Linux `mcontext_t`. +#[repr(C, align(16))] +#[derive(Debug, Clone, Copy)] +pub struct RiscvFpState { + pub fpregs: [u64; 32], + pub fcsr: u32, + pub reserved: [u32; 67], +} + +impl Default for RiscvFpState { + fn default() -> Self { + Self { + fpregs: [0; 32], + fcsr: 0, + reserved: [0; 67], + } + } +} + +impl Pod for RiscvFpState {} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct RiscvMContext { + /// Slot 0 stores the saved PC on riscv64 Linux. + pub gregs: [usize; 32], + pub fpstate: RiscvFpState, +} + +impl Pod for RiscvMContext {} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct RiscvUContext { + pub uc_flags: usize, + pub uc_link: usize, + pub uc_stack: StackT, + pub uc_sigmask: SigSetT, + pub uc_mcontext: RiscvMContext, +} + +impl Pod for RiscvUContext {} + /// 用户态 `rt_sigreturn` trampoline 机器码。 const USER_VDSO_CODE: [u8; 8] = [ 0x93, 0x08, 0xb0, 0x08, // addi a7, zero, 139 @@ -119,6 +199,83 @@ impl TrapMachine for RiscvTrapMachine { } } +impl SignalAbi for RiscvSignalAbi { + type UserSigAction = RiscvUserSigAction; + type UContext = RiscvUContext; + + fn decode_user_sigaction(action: Self::UserSigAction) -> SignalAction { + SignalAction { + handler: action.handler, + sa_flags: action.sa_flags as u32, + sa_restorer: 0, + sa_mask: SignalBit::from_user_bits(action.sa_mask).bits(), + } + } + + fn encode_user_sigaction(action: SignalAction) -> Self::UserSigAction { + Self::UserSigAction { + handler: action.handler, + sa_flags: action.sa_flags as usize, + sa_mask: SignalBit::from_bits(action.sa_mask) + .unwrap_or(SignalBit::empty()) + .user_bits(), + } + } + + fn user_sigaction_parts(action: &Self::UserSigAction) -> (usize, usize, usize, u64) { + (action.handler, action.sa_flags, 0, action.sa_mask) + } + + fn build_ucontext(trap_cx: &TrapContext, old_mask: u64) -> Self::UContext { + let mut fpstate = RiscvFpState::default(); + trap_cx.copy_fp_state_to(&mut fpstate.fpregs, &mut fpstate.fcsr); + Self::UContext { + uc_flags: 0, + uc_link: 0, + uc_stack: StackT { + ss_sp: 0, + ss_flags: 0, + ss_size: 0, + }, + uc_sigmask: SigSetT::from_signal_bits(old_mask), + uc_mcontext: RiscvMContext { + gregs: trap_cx.export_signal_gprs(), + fpstate, + }, + } + } + + fn signal_mask(ucontext: &Self::UContext) -> u64 { + ucontext.uc_sigmask.low_bits() + } + + fn restore_ucontext(ucontext: &Self::UContext, trap_cx: &mut TrapContext) { + trap_cx.import_signal_gprs(&ucontext.uc_mcontext.gregs); + trap_cx.restore_fp_state( + &ucontext.uc_mcontext.fpstate.fpregs, + ucontext.uc_mcontext.fpstate.fcsr, + ); + } + + fn saved_pc(ucontext: &Self::UContext) -> usize { + ucontext.uc_mcontext.gregs[0] + } + + fn set_saved_pc(ucontext: &mut Self::UContext, pc: usize) { + ucontext.uc_mcontext.gregs[0] = pc; + } + + fn saved_arg0(ucontext: &Self::UContext) -> usize { + let index = ::signal_gpr_arg0_index(); + ucontext.uc_mcontext.gregs[index] + } + + fn set_saved_arg0(ucontext: &mut Self::UContext, value: usize) { + let index = ::signal_gpr_arg0_index(); + ucontext.uc_mcontext.gregs[index] = value; + } +} + impl TrapContextAbi for RiscvTrapContextAbi { type Frame = RiscvTrapContextFrame; @@ -242,4 +399,40 @@ impl TrapContextAbi for RiscvTrapContextAbi { frame.f.copy_from_slice(fpregs); frame.fcsr = fcsr as usize; } + + fn fault_dump_summary(frame: &Self::Frame) -> [NamedReg; 7] { + [ + NamedReg { name: "ra", value: frame.x[1] }, + NamedReg { name: "sp", value: frame.x[2] }, + NamedReg { name: "gp", value: frame.x[3] }, + NamedReg { name: "tp", value: frame.x[4] }, + NamedReg { name: "a0", value: frame.x[10] }, + NamedReg { name: "a1", value: frame.x[11] }, + NamedReg { name: "a7", value: frame.x[17] }, + ] + } + + fn fault_dump_detail(frame: &Self::Frame) -> [NamedReg; 19] { + [ + NamedReg { name: "t0", value: frame.x[5] }, + NamedReg { name: "t1", value: frame.x[6] }, + NamedReg { name: "t2", value: frame.x[7] }, + NamedReg { name: "s0", value: frame.x[8] }, + NamedReg { name: "s1", value: frame.x[9] }, + NamedReg { name: "s2", value: frame.x[18] }, + NamedReg { name: "s3", value: frame.x[19] }, + NamedReg { name: "s4", value: frame.x[20] }, + NamedReg { name: "s5", value: frame.x[21] }, + NamedReg { name: "s6", value: frame.x[22] }, + NamedReg { name: "s7", value: frame.x[23] }, + NamedReg { name: "s8", value: frame.x[24] }, + NamedReg { name: "s9", value: frame.x[25] }, + NamedReg { name: "s10", value: frame.x[26] }, + NamedReg { name: "s11", value: frame.x[27] }, + NamedReg { name: "t3", value: frame.x[28] }, + NamedReg { name: "t4", value: frame.x[29] }, + NamedReg { name: "t5", value: frame.x[30] }, + NamedReg { name: "t6", value: frame.x[31] }, + ] + } } diff --git a/os/src/hal/mod.rs b/os/src/hal/mod.rs index e5d27544..543ac44d 100644 --- a/os/src/hal/mod.rs +++ b/os/src/hal/mod.rs @@ -8,6 +8,7 @@ use crate::hal::traits::{AddressSpaceToken, HartId, PTEFlags, PagingArch}; #[cfg(target_arch = "riscv64")] pub use crate::arch::riscv::{ RiscvHartId as ArchHart, RiscvInterruptControl as ArchInterrupt, + RiscvSignalAbi as ArchSignalAbi, RiscvSyscallAbi as ArchSyscallAbi, RiscvTrapContextAbi as ArchTrapContextAbi, RiscvTrapMachine as ArchTrapMachine, Sv39Paging as ArchPaging, }; @@ -15,6 +16,7 @@ pub use crate::arch::riscv::{ #[cfg(target_arch = "loongarch64")] pub use crate::arch::loongarch64::{ LoongArchHartId as ArchHart, LoongArchInterruptControl as ArchInterrupt, + LoongArchSignalAbi as ArchSignalAbi, LoongArchSyscallAbi as ArchSyscallAbi, LoongArchTrapContextAbi as ArchTrapContextAbi, LoongArchTrapMachine as ArchTrapMachine, LoongArchPaging as ArchPaging, }; diff --git a/os/src/hal/traits.rs b/os/src/hal/traits.rs index bd464b01..8d32dda1 100644 --- a/os/src/hal/traits.rs +++ b/os/src/hal/traits.rs @@ -68,6 +68,29 @@ pub struct TrapInfo { pub fault_addr: usize, } +/// One architecture-labeled general-purpose register used in fault dumps. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct NamedReg { + pub name: &'static str, + pub value: usize, +} + +/// Architecture-normalized arguments for the legacy Linux `clone` syscall. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct CloneArgs { + pub flags: usize, + pub stack: usize, + pub parent_tid: usize, + pub tls: usize, + pub child_tid: usize, +} + +/// Architecture-specific syscall ABI details that are visible above traps. +pub trait SyscallAbi { + /// Decode raw syscall argument registers into the common legacy `clone` layout. + fn decode_clone_args(args: [usize; 6]) -> CloneArgs; +} + /// Arch-specific trap/syscall machine operations used by common code. pub trait TrapMachine { /// Read the current trap cause and associated fault address. @@ -161,6 +184,10 @@ pub trait TrapContextAbi { fn copy_fp_state_to(frame: &Self::Frame, fpregs: &mut [u64; 32], fcsr: &mut u32); /// Restore floating-point state from an external signal frame. fn restore_fp_state(frame: &mut Self::Frame, fpregs: &[u64; 32], fcsr: u32); + /// Export a compact architecture-labeled summary used by common fault logs. + fn fault_dump_summary(frame: &Self::Frame) -> [NamedReg; 7]; + /// Export a wider architecture-labeled register snapshot used by fault logs. + fn fault_dump_detail(frame: &Self::Frame) -> [NamedReg; 19]; } /// Opaque address-space activation token used by the current architecture. diff --git a/os/src/signal/action.rs b/os/src/signal/action.rs index 11b136bc..f724575a 100644 --- a/os/src/signal/action.rs +++ b/os/src/signal/action.rs @@ -125,7 +125,7 @@ impl SigInfo { } } -/// Linux `sigset_t` layout used inside `ucontext_t` on riscv64 glibc. +/// Linux `sigset_t` layout used inside `ucontext_t` on 64-bit Linux ABIs. #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct SigSetT { @@ -160,90 +160,6 @@ impl Default for SigSetT { impl Pod for SigSetT {} -/// Floating-point state area embedded in riscv64 Linux `mcontext_t`. -#[repr(C, align(16))] -#[derive(Debug, Clone, Copy)] -pub struct FpState { - /// 32 double-precision FP registers. - pub fpregs: [u64; 32], - /// Floating-point control/status register. - pub fcsr: u32, - /// Padding that preserves the glibc-visible size of the FP-state union. - pub reserved: [u32; 67], -} - -impl Default for FpState { - fn default() -> Self { - Self { - fpregs: [0; 32], - fcsr: 0, - reserved: [0; 67], - } - } -} - -impl Pod for FpState {} - -impl FpState { - /// Build an fp-state blob from the saved trap context. - pub fn from_trap_context(trap_cx: &TrapContext) -> Self { - let mut fpstate = Self::default(); - trap_cx.copy_fp_state_to(&mut fpstate.fpregs, &mut fpstate.fcsr); - fpstate - } - - /// Restore floating-point state into the saved trap context. - pub fn apply_to_trap_context(&self, trap_cx: &mut TrapContext) { - trap_cx.restore_fp_state(&self.fpregs, self.fcsr); - } -} - -/// riscv64 Linux `mcontext_t`. -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct MContext { - /// General-purpose register file. Slot 0 stores the saved PC on riscv64. - pub gregs: [usize; 32], - /// Floating-point state blob. - pub fpstate: FpState, -} - -impl Pod for MContext {} - -impl MContext { - /// Build a Linux-compatible machine context from the saved trap context. - pub fn from_trap_context(trap_cx: &TrapContext) -> Self { - Self { - gregs: trap_cx.export_signal_gprs(), - fpstate: FpState::from_trap_context(trap_cx), - } - } - - /// Restore a Linux-compatible machine context into the saved trap context. - pub fn apply_to_trap_context(&self, trap_cx: &mut TrapContext) { - trap_cx.import_signal_gprs(&self.gregs); - self.fpstate.apply_to_trap_context(trap_cx); - } -} - -/// ucontext_t structure -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct UContext { - /// Context flags - pub uc_flags: usize, - /// Link to next context - pub uc_link: usize, - /// Signal stack - pub uc_stack: StackT, - /// Signal mask - pub uc_sigmask: SigSetT, - /// Machine context (registers) - pub uc_mcontext: MContext, -} - -impl Pod for UContext {} - /// stack_t structure #[repr(C)] #[derive(Debug, Clone, Copy)] @@ -257,3 +173,36 @@ pub struct StackT { } impl Pod for StackT {} + +/// Architecture-specific Linux signal ABI hooks. +/// +/// Concrete `rt_sigaction` and `ucontext_t` layouts differ across Linux +/// architectures. The common signal code owns delivery policy, while each +/// architecture owns the byte layout written to and read from userspace. +pub trait SignalAbi { + /// Raw userspace layout accepted by `rt_sigaction(2)`. + type UserSigAction: Copy + core::fmt::Debug + Pod; + /// Raw userspace `ucontext_t` layout used by signal frames. + type UContext: Copy + Pod; + + /// Convert a raw userspace `rt_sigaction` payload into kernel state. + fn decode_user_sigaction(action: Self::UserSigAction) -> SignalAction; + /// Convert kernel signal action state back into userspace layout. + fn encode_user_sigaction(action: SignalAction) -> Self::UserSigAction; + /// Return debug-friendly raw `rt_sigaction` fields: handler, flags, restorer, mask. + fn user_sigaction_parts(action: &Self::UserSigAction) -> (usize, usize, usize, u64); + /// Build a userspace `ucontext_t` for the interrupted trap context. + fn build_ucontext(trap_cx: &TrapContext, old_mask: u64) -> Self::UContext; + /// Return the low signal-mask bits stored in a userspace context. + fn signal_mask(ucontext: &Self::UContext) -> u64; + /// Restore the saved machine context into the trap context. + fn restore_ucontext(ucontext: &Self::UContext, trap_cx: &mut TrapContext); + /// Read the saved PC used for syscall-restart diagnostics. + fn saved_pc(ucontext: &Self::UContext) -> usize; + /// Update the saved PC before writing the context to userspace. + fn set_saved_pc(ucontext: &mut Self::UContext, pc: usize); + /// Read the saved first argument / return register. + fn saved_arg0(ucontext: &Self::UContext) -> usize; + /// Update the saved first argument / return register. + fn set_saved_arg0(ucontext: &mut Self::UContext, value: usize); +} diff --git a/os/src/signal/mod.rs b/os/src/signal/mod.rs index 6cdc7f16..e6da93ce 100644 --- a/os/src/signal/mod.rs +++ b/os/src/signal/mod.rs @@ -2,7 +2,7 @@ use crate::{ config::USER_VDSO_RT_SIGRETURN, - hal::{ArchTrapContextAbi, ArchTrapMachine, traits::{TrapContextAbi, TrapMachine}}, + hal::{ArchSignalAbi, ArchTrapContextAbi, ArchTrapMachine, traits::{TrapContextAbi, TrapMachine}}, syscall::write_pod_to_user, task::{current_task, current_trap_cx}, }; @@ -11,7 +11,7 @@ mod action; mod signals; mod wait; -pub use action::{FpState, MContext, SigInfo, SigSetT, StackT, UContext, SignalAction, SignalActions}; +pub use action::{SigInfo, SigSetT, SignalAbi, SignalAction, SignalActions, StackT}; pub use signals::{SignalBit, SignalNum, FIRST_RT_SIG, LAST_RT_SIG, MAX_SIG}; pub(crate) use wait::{ cleanup_signal_wait, cleanup_signal_wait_for_task, handle_signal_wait_timeout, @@ -211,7 +211,7 @@ pub fn handle_signals() -> Option { } // Allocate space for ucontext_t - user_sp -= core::mem::size_of::(); + user_sp -= core::mem::size_of::<::UContext>(); // Align stack to 16 bytes BEFORE setting pointers user_sp &= !0xf; @@ -219,7 +219,7 @@ pub fn handle_signals() -> Option { // Now set the pointers based on aligned sp let ucontext_ptr = user_sp; let siginfo_ptr = if action.sa_flags & SaFlags::SA_SIGINFO.bits() != 0 { - user_sp + core::mem::size_of::() + user_sp + core::mem::size_of::<::UContext>() } else { 0 }; @@ -236,7 +236,7 @@ pub fn handle_signals() -> Option { .bits() }; - let mut mcontext = MContext::from_trap_context(trap_cx); + let mut ucontext = ArchSignalAbi::build_ucontext(trap_cx, old_mask); // Syscall restart: if the signal interrupted a syscall that returned -EINTR // and SA_RESTART is set, back up PC to the ecall instruction and restore @@ -248,17 +248,20 @@ pub fn handle_signals() -> Option { let a0_idx = ::signal_gpr_arg0_index(); debug!( "handle_signals: syscall restart: backing up PC from {:#x} to {:#x}, restoring a0 (gregs[{}]) from {:#x} to {:#x}", - mcontext.gregs[0], + ArchSignalAbi::saved_pc(&ucontext), trap_cx .user_pc() .wrapping_sub(ArchTrapMachine::syscall_instruction_len()), a0_idx, - mcontext.gregs[a0_idx], trap_cx.orig_a0 + ArchSignalAbi::saved_arg0(&ucontext), trap_cx.orig_a0 ); - mcontext.gregs[0] = trap_cx - .user_pc() - .wrapping_sub(ArchTrapMachine::syscall_instruction_len()); - mcontext.gregs[a0_idx] = trap_cx.orig_a0; + ArchSignalAbi::set_saved_pc( + &mut ucontext, + trap_cx + .user_pc() + .wrapping_sub(ArchTrapMachine::syscall_instruction_len()), + ); + ArchSignalAbi::set_saved_arg0(&mut ucontext, trap_cx.orig_a0); } else if action.sa_flags & SaFlags::SA_RESTART.bits() != 0 { debug!( "handle_signals: syscall returned EINTR but syscall is not restartable, preserving EINTR" @@ -273,24 +276,12 @@ pub fn handle_signals() -> Option { } else { debug!( "handle_signals: in_syscall=false, saved PC={:#x}", - mcontext.gregs[0] + ArchSignalAbi::saved_pc(&ucontext) ); } - let ucontext = UContext { - uc_flags: 0, - uc_link: 0, - uc_stack: StackT { - ss_sp: 0, - ss_flags: 0, - ss_size: 0, - }, - uc_sigmask: SigSetT::from_signal_bits(old_mask), - uc_mcontext: mcontext, - }; - // Write ucontext to user stack - if let Err(err) = write_pod_to_user(ucontext_ptr as *mut UContext, &ucontext) { + if let Err(err) = write_pod_to_user(ucontext_ptr as *mut ::UContext, &ucontext) { warn!( "[kernel] handle_signals: failed to write ucontext for signal {}: {:?}", signum, err diff --git a/os/src/sync/futex.rs b/os/src/sync/futex.rs index 83161d53..b4503d91 100644 --- a/os/src/sync/futex.rs +++ b/os/src/sync/futex.rs @@ -249,6 +249,7 @@ pub fn futex_wait_addr( expected: i32, deadline_ns: Option, ) -> Result { + let task = current_task().unwrap(); let current = read_pod_from_user(uaddr)?; if current != expected { return Err(ERRNO::EAGAIN); @@ -262,7 +263,6 @@ pub fn futex_wait_addr( } let queue = futex_queue(uaddr as usize); - let task = current_task().unwrap(); let handle = deadline_ns .map(|_| register_futex_wait(&task).ok_or(ERRNO::EAGAIN)) .transpose()?; @@ -270,10 +270,13 @@ pub fn futex_wait_addr( add_timer_with_futex_tag(deadline_ns, Arc::clone(&task), Some(handle.timer_tag())); } queue.wait_with_reason_or_skip(WaitReason::Futex, || { - read_pod_from_user(uaddr) - .map(|current| current != expected) - .unwrap_or(true) - || handle.is_some_and(futex_wait_should_skip) + let current_after_enqueue = read_pod_from_user(uaddr); + let value_changed = current_after_enqueue + .as_ref() + .map(|current| *current != expected) + .unwrap_or(true); + let handle_ready = handle.is_some_and(futex_wait_should_skip); + value_changed || handle_ready }); if let Some(handle) = handle { let wake_state = futex_wait_state(handle); diff --git a/os/src/syscall/mman.rs b/os/src/syscall/mman.rs index 183c15b0..7ccbabe6 100644 --- a/os/src/syscall/mman.rs +++ b/os/src/syscall/mman.rs @@ -199,7 +199,7 @@ pub fn sys_mmap(addr: usize, len: usize, prot: usize, flags: usize, fd: usize, o (chosen, chosen_end, hint) }; debug!( - "[mmap] auto-selected range: pid={} start={:#x} end={:#x} hint_in={:#x} hint_out={:#x} file_backed={} shared={} lazy={}", + "[mmap-debug] auto-selected pid={} start={:#x} end={:#x} hint_in={:#x} hint_out={:#x} file_backed={} shared={} lazy={}", pid, chosen, chosen_end, @@ -230,7 +230,7 @@ pub fn sys_mmap(addr: usize, len: usize, prot: usize, flags: usize, fd: usize, o }; mapped?; debug!( - "[mmap] fixed range registered: pid={} start={:#x} end={:#x} file_backed={} shared={} lazy={}", + "[mmap-debug] fixed registered pid={} start={:#x} end={:#x} file_backed={} shared={} lazy={}", pid, addr, end, @@ -295,7 +295,7 @@ pub fn sys_munmap(start: usize, len: usize) -> isize { let end = start.checked_add(len).ok_or(ERRNO::EOVERFLOW)?; if munmap_current_process(VirtAddr::from(start), VirtAddr::from(end)) { debug!( - "[munmap] complete: pid={} start={:#x} end={:#x}", + "[munmap-debug] complete pid={} start={:#x} end={:#x}", pid, start, end diff --git a/os/src/syscall/mod.rs b/os/src/syscall/mod.rs index 308a7534..e28efe72 100644 --- a/os/src/syscall/mod.rs +++ b/os/src/syscall/mod.rs @@ -453,7 +453,13 @@ pub(crate) use fs::{bpf_prog_is_socket_filter, bpf_run_socket_filter_prog, write pub use times::Timespec; -use crate::{fs::Stat, net::SockAddrIn, syscall::errno::ERRNO}; +use crate::{ + fs::Stat, + hal::traits::SyscallAbi, + hal::ArchSyscallAbi, + net::SockAddrIn, + syscall::errno::ERRNO, +}; use crate::klog::*; /// Execute a syscall body that returns `Result`, automatically @@ -805,7 +811,7 @@ pub fn syscall(syscall_id: usize, args: [usize; 6]) -> isize { SYSCALL_SHMCTL => sys_shmctl(args[0], args[1] as i32, args[2]), SYSCALL_SHMAT => sys_shmat(args[0], args[1], args[2] as i32), SYSCALL_SHMDT => sys_shmdt(args[0]), - SYSCALL_CLONE => sys_clone(args[0], args[1], args[2], args[3], args[4]), + SYSCALL_CLONE => sys_clone(ArchSyscallAbi::decode_clone_args(args)), SYSCALL_UNSHARE => sys_unshare(args[0]), SYSCALL_SETNS => sys_setns(args[0] as i32, args[1] as i32), SYSCALL_EXECVE => sys_execve( diff --git a/os/src/syscall/process.rs b/os/src/syscall/process.rs index 8b08c398..e3ba2ab0 100644 --- a/os/src/syscall/process.rs +++ b/os/src/syscall/process.rs @@ -1,3 +1,4 @@ +use crate::hal::traits::CloneArgs; use crate::mm::{frame_allocator_stats, MapPermission, USER_SPACE_END, VirtAddr}; use crate::syscall::errno::{OrErrno, ERRNO}; use crate::syscall::{read_pod_from_user, translated_byte_buffer_with_access, write_pod_to_user, Pod}; @@ -813,14 +814,15 @@ bitflags! { /// Linux `clone` syscall。 /// /// 当前支持 fork-like 进程创建,以及 musl pthread 使用的 CLONE_VM 线程创建子集。 -pub fn sys_clone( - flags: usize, - stack: usize, - parent_tid: usize, - tls: usize, - child_tid: usize, -) -> isize { +pub fn sys_clone(args: CloneArgs) -> isize { syscall_body!({ + let CloneArgs { + flags, + stack, + parent_tid, + tls, + child_tid, + } = args; let clone_flags_arg = flags; trace!( "kernel:pid[{}] sys_clone flags={:#x} stack={:#x}", @@ -912,7 +914,6 @@ pub fn sys_clone( } let mut child_tls = None; if flags.contains(CloneFlags::CLONE_SETTLS) { - debug!("kernel: sys_clone set child TLS to {:#x}", tls); child_tls = Some(tls); } let current_process = current_process(); diff --git a/os/src/syscall/signal.rs b/os/src/syscall/signal.rs index 06b9f2c0..b2fb265a 100644 --- a/os/src/syscall/signal.rs +++ b/os/src/syscall/signal.rs @@ -1,12 +1,12 @@ -use crate::signal::{SigInfo, SignalWaitHandle, SignalWakeState, register_signal_wait}; +use crate::hal::ArchSignalAbi; +use crate::signal::{register_signal_wait, SigInfo, SignalAbi, SignalWaitHandle, SignalWakeState}; use crate::syscall::{read_pod_from_user, write_pod_to_user, Pod, ERRNO}; -use crate::task::UContext; use crate::{ mm::{PageFaultAccess, VirtAddr, translated_ref}, syscall_body, task::{ block_current_and_run_next, current_process, current_task, current_user_token, - SignalAction, SignalBit, TaskStatus, WaitReason, + SignalBit, TaskStatus, WaitReason, }, }; use crate::timer::{add_timer_with_signal_tag, get_time_ns}; @@ -151,44 +151,7 @@ fn signal_wait_sleep(handle: SignalWaitHandle, signal_set: SignalBit) { block_current_and_run_next(WaitReason::SignalTimedWait); } -/// RISC-V Linux `rt_sigaction` 用户态 ABI 布局。 -/// -/// RISC-V 不使用 `SA_RESTORER` 字段,第三个 word 是 `sigset_t` 的低 64 位。 -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct UserSigAction { - /// handler 地址,或 SIG_DFL/SIG_IGN。 - pub handler: usize, - /// Linux `SA_*` 标志位。 - pub sa_flags: usize, - /// 用户态信号掩码,当前内核按低 64 位读取并裁剪未支持的信号位。 - pub sa_mask: u64, -} - -impl Pod for UserSigAction {} - -impl From for SignalAction { - fn from(action: UserSigAction) -> Self { - Self { - handler: action.handler, - sa_flags: action.sa_flags as u32, - sa_restorer: 0, - sa_mask: SignalBit::from_user_bits(action.sa_mask).bits(), - } - } -} - -impl From for UserSigAction { - fn from(action: SignalAction) -> Self { - Self { - handler: action.handler, - sa_flags: action.sa_flags as usize, - sa_mask: SignalBit::from_bits(action.sa_mask) - .unwrap_or(SignalBit::empty()) - .user_bits(), - } - } -} +pub type UserSigAction = ::UserSigAction; fn is_valid_signal_handler_address(handler: usize) -> bool { if handler == crate::task::SIG_DFL || handler == crate::task::SIG_IGN { @@ -267,7 +230,7 @@ pub fn sys_sigaction( ), } } - let new_action = SignalAction::from(user_action); + let new_action = ArchSignalAbi::decode_user_sigaction(user_action); debug!( "sys_sigaction parsed action: handler={:#x}, flags={:#x}, mask={:#x}", new_action.handler, @@ -299,7 +262,7 @@ pub fn sys_sigaction( // Return old action after dropping process.inner; copyout may prefault user pages. if !old_action.is_null() { - let user_old = UserSigAction::from(old); + let user_old = ArchSignalAbi::encode_user_sigaction(old); write_pod_to_user(old_action, &user_old)?; debug!( "sys_sigaction: signum={}, returning old handler={:#x}, flags={:#x}", @@ -379,7 +342,10 @@ pub fn sys_sigreturn() -> isize { // Read ucontext from user stack at sp. The frame can cross a page boundary, // so this must use the byte-wise copy helper instead of translated_ref. let ucontext_ptr = user_sp; - let ucontext = read_pod_from_user(ucontext_ptr as *const UContext).map_err(|err| { + let ucontext = read_pod_from_user( + ucontext_ptr as *const ::UContext, + ) + .map_err(|err| { error!( "sys_sigreturn: failed to read ucontext at {:#x}: {:?}", ucontext_ptr, err @@ -390,22 +356,21 @@ pub fn sys_sigreturn() -> isize { debug!( "sys_sigreturn: restoring context from {:#x}, sigmask={:#x}", ucontext_ptr, - ucontext.uc_sigmask.low_bits() + ArchSignalAbi::signal_mask(&ucontext) ); // Restore signal mask { let task = current_task().unwrap(); let mut inner = task.inner_exclusive_access(); - let mask = SignalBit::from_user_bits(ucontext.uc_sigmask.low_bits()); + let mask = SignalBit::from_user_bits(ArchSignalAbi::signal_mask(&ucontext)); inner.signal_mask = mask; inner.signal_mask_backup = None; debug!("sys_sigreturn: restored signal mask to {:#x}", mask.bits()); } // Restore registers from mcontext - let mcontext = &ucontext.uc_mcontext; - mcontext.apply_to_trap_context(trap_cx); + ArchSignalAbi::restore_ucontext(&ucontext, trap_cx); debug!( "sys_sigreturn: restored sepc={:#x}, a0={:#x}", diff --git a/os/src/syscall/sync.rs b/os/src/syscall/sync.rs index 60102727..dd8af2b3 100644 --- a/os/src/syscall/sync.rs +++ b/os/src/syscall/sync.rs @@ -97,7 +97,9 @@ pub fn sys_futex( let flags = op & !FUTEX_CMD_MASK; let current = read_pod_from_user(uaddr); debug!( - "sys_futex WAIT: uaddr={:#x} expected={} current={:?} flags={:#x} timeout_ptr={:#x}", + "[futex-debug] syscall WAIT pid={} tid={} uaddr={:#x} expected={} current={:?} flags={:#x} timeout_ptr={:#x}", + current_process().getpid(), + current_tid(), uaddr as usize, val, current, @@ -119,15 +121,22 @@ pub fn sys_futex( None => None, }; let ret = futex_wait_addr(uaddr, val, deadline); - debug!( - "sys_futex WAIT result: uaddr={:#x} expected={} ret={:?}", + warn!( + "[futex-debug] syscall WAIT result pid={} tid={} uaddr={:#x} expected={} deadline={:?} ret={:?}", + current_process().getpid(), + current_tid(), uaddr as usize, val, + deadline, ret ); ret } - FUTEX_WAKE => Ok(futex_wake_addr(uaddr as usize, val.max(0) as usize)), + FUTEX_WAKE => { + let max_count = val.max(0) as usize; + let ret = futex_wake_addr(uaddr as usize, max_count); + Ok(ret) + } FUTEX_REQUEUE => { let flags = op & !FUTEX_CMD_MASK; if uaddr2 == 0 || uaddr2 & (core::mem::align_of::() - 1) != 0 @@ -149,12 +158,13 @@ pub fn sys_futex( } let src = futex_queue(uaddr as usize); let dst = futex_queue(uaddr2); - Ok(src.wake_and_requeue_with( + let ret = src.wake_and_requeue_with( &dst, val.max(0) as usize, timeout, futex_wait_mark_ready, - ) as isize) + ) as isize; + Ok(ret) } FUTEX_WAIT_BITSET => { let flags = op & !FUTEX_CMD_MASK; @@ -206,7 +216,9 @@ pub fn sys_futex( ); return Err(ERRNO::EINVAL); } - Ok(futex_wake_addr(uaddr as usize, val.max(0) as usize)) + let max_count = val.max(0) as usize; + let ret = futex_wake_addr(uaddr as usize, max_count); + Ok(ret) } _ => { warn!( diff --git a/os/src/task/mod.rs b/os/src/task/mod.rs index 6de8d989..4c5d1aa1 100644 --- a/os/src/task/mod.rs +++ b/os/src/task/mod.rs @@ -41,8 +41,8 @@ pub use crate::sched::{ yield_current_and_run_next, }; pub use crate::signal::{ - check_signals_of_current, handle_signals, MContext, MAX_SIG, SaFlags, SigInfo, SignalAction, - SignalActions, SignalBit, StackT, UContext, SIG_DFL, SIG_IGN, + check_signals_of_current, handle_signals, MAX_SIG, SaFlags, SigInfo, SignalAction, + SignalActions, SignalBit, SIG_DFL, SIG_IGN, }; pub use wait_queue::{WaitQueue, WaitQueueHandle, WaitQueueKeyed}; pub use process::{ExitReason, FdEntry, FdFlags, ProcessKeyrings, ShmAttachment}; diff --git a/os/src/trap/context.rs b/os/src/trap/context.rs index a35a3fce..27acd2b7 100644 --- a/os/src/trap/context.rs +++ b/os/src/trap/context.rs @@ -1,6 +1,6 @@ //! Implementation of [`TrapContext`] use crate::hal::ArchTrapContextAbi; -use crate::hal::traits::TrapContextAbi; +use crate::hal::traits::{NamedReg, TrapContextAbi}; #[repr(C)] #[derive(Debug, Clone, Copy)] @@ -141,6 +141,16 @@ impl TrapContext { ArchTrapContextAbi::restore_fp_state(&mut self.arch, fpregs, fcsr); } + /// Export an architecture-labeled summary used by common fault logs. + pub fn fault_dump_summary(&self) -> [NamedReg; 7] { + ArchTrapContextAbi::fault_dump_summary(&self.arch) + } + + /// Export a wider architecture-labeled register snapshot used by fault logs. + pub fn fault_dump_detail(&self) -> [NamedReg; 19] { + ArchTrapContextAbi::fault_dump_detail(&self.arch) + } + /// init the trap context of an application pub fn app_init_context( entry: usize, @@ -150,7 +160,7 @@ impl TrapContext { trap_handler: usize, ) -> Self { unsafe { crate::hal::enable_fp() }; - let mut cx = Self { + let cx = Self { arch: ArchTrapContextAbi::new_user_frame( entry, sp, diff --git a/os/src/trap/mod.rs b/os/src/trap/mod.rs index 0fbd50ea..45f3e034 100644 --- a/os/src/trap/mod.rs +++ b/os/src/trap/mod.rs @@ -31,61 +31,96 @@ use crate::hal::traits::{InterruptControl, TrapCause, TrapMachine}; /// 输出用户态致命异常现场,区分 fault 地址、用户 PC 与关键寄存器。 fn log_user_fault(reason: &str, access: &str, fault_addr: usize, signal: &str) { let cx = current_trap_cx(); + let summary = cx.fault_dump_summary(); + let detail = cx.fault_dump_detail(); error!( - "[kernel] user fault: reason={}, access={}, pid={}, fault_addr={:#x}, user_pc={:#x}, ra={:#x}, sp={:#x}, gp={:#x}, tp={:#x}, a0={:#x}, a1={:#x}, a7={:#x}, signal={}", + "[kernel] user fault: reason={}, access={}, pid={}, fault_addr={:#x}, user_pc={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, signal={}", reason, access, current_process().getpid(), fault_addr, cx.user_pc(), - cx.ra(), - cx.user_sp(), - cx.reg(3), - cx.tls(), - cx.reg(10), - cx.reg(11), - cx.syscall_nr(), + summary[0].name, + summary[0].value, + summary[1].name, + summary[1].value, + summary[2].name, + summary[2].value, + summary[3].name, + summary[3].value, + summary[4].name, + summary[4].value, + summary[5].name, + summary[5].value, + summary[6].name, + summary[6].value, signal, ); error!( - "[kernel] user fault regs: t0={:#x}, t1={:#x}, t2={:#x}, s0={:#x}, s1={:#x}, s2={:#x}, s3={:#x}, s4={:#x}, s5={:#x}, s6={:#x}, s7={:#x}, s8={:#x}, s9={:#x}, s10={:#x}, s11={:#x}, t3={:#x}, t4={:#x}, t5={:#x}, t6={:#x}", - cx.reg(5), - cx.reg(6), - cx.reg(7), - cx.reg(8), - cx.reg(9), - cx.reg(18), - cx.reg(19), - cx.reg(20), - cx.reg(21), - cx.reg(22), - cx.reg(23), - cx.reg(24), - cx.reg(25), - cx.reg(26), - cx.reg(27), - cx.reg(28), - cx.reg(29), - cx.reg(30), - cx.reg(31), + "[kernel] user fault regs: {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}", + detail[0].name, + detail[0].value, + detail[1].name, + detail[1].value, + detail[2].name, + detail[2].value, + detail[3].name, + detail[3].value, + detail[4].name, + detail[4].value, + detail[5].name, + detail[5].value, + detail[6].name, + detail[6].value, + detail[7].name, + detail[7].value, + detail[8].name, + detail[8].value, + detail[9].name, + detail[9].value, + detail[10].name, + detail[10].value, + detail[11].name, + detail[11].value, + detail[12].name, + detail[12].value, + detail[13].name, + detail[13].value, + detail[14].name, + detail[14].value, + detail[15].name, + detail[15].value, + detail[16].name, + detail[16].value, + detail[17].name, + detail[17].value, + detail[18].name, + detail[18].value, ); } fn log_lazy_fault_oom(path: &str, access: &str, fault_addr: usize) { let cx = current_trap_cx(); + let summary = cx.fault_dump_summary(); error!( - "[kernel] fatal lazy-fault OOM: path={}, access={}, pid={}, fault_addr={:#x}, user_pc={:#x}, ra={:#x}, sp={:#x}, tp={:#x}, a0={:#x}, a1={:#x}, a7={:#x}", + "[kernel] fatal lazy-fault OOM: path={}, access={}, pid={}, fault_addr={:#x}, user_pc={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}, {}={:#x}", path, access, current_process().getpid(), fault_addr, cx.user_pc(), - cx.ra(), - cx.user_sp(), - cx.tls(), - cx.reg(10), - cx.reg(11), - cx.syscall_nr(), + summary[0].name, + summary[0].value, + summary[1].name, + summary[1].value, + summary[3].name, + summary[3].value, + summary[4].name, + summary[4].value, + summary[5].name, + summary[5].value, + summary[6].name, + summary[6].value, ); } From ce83f219faef8da1d9c477a28f6df8f6d0584a2e Mon Sep 17 00:00:00 2001 From: Kyle Date: Tue, 16 Jun 2026 22:38:10 +0800 Subject: [PATCH 2/4] feat: Support syscall `clone3` (435); save complete registers in kernel trap. --- Makefile | 3 + os/src/arch/loongarch64/trap.S | 40 +++++++++- os/src/config.rs | 2 +- os/src/syscall/mod.rs | 3 + os/src/syscall/process.rs | 142 ++++++++++++++++++++++++++++++--- user/src/lib.rs | 5 ++ user/src/syscall.rs | 21 +++++ 7 files changed, 199 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 666db597..2f265c0c 100644 --- a/Makefile +++ b/Makefile @@ -319,6 +319,9 @@ fast-run: check-kernel fast-run-la: check-kernel-la $(LA_BOOTLOADER_ELF) $(QEMU_LA) -machine virt -cpu la464 -kernel $(LA_BOOTLOADER_ELF) -device loader,file=kernel-la,addr=$(LA_KERNEL_ENTRY_PA) -m $(MEM_LA) -nographic -smp $(SMP) $(QEMU_LA_BLK_ARGS) -device virtio-net-pci,netdev=net0,id=net0 -netdev $(QEMU_LA_NETDEV) -no-reboot -rtc base=utc $(QEMU_LA_EXTRA_BLK_ARGS) +fast-run-la-trace: check-kernel-la $(LA_BOOTLOADER_ELF) + $(QEMU_LA) -machine virt -cpu la464 -kernel $(LA_BOOTLOADER_ELF) -device loader,file=kernel-la,addr=$(LA_KERNEL_ENTRY_PA) -m $(MEM_LA) -nographic -smp $(SMP) $(QEMU_LA_BLK_ARGS) -device virtio-net-pci,netdev=net0,id=net0 -netdev $(QEMU_LA_NETDEV) -no-reboot -rtc base=utc $(QEMU_LA_EXTRA_BLK_ARGS) -d int,in_asm -D qemu.log + run-trace: QEMU_TRACE_ARGS = -d int,in_asm -D qemu.log run-trace: run diff --git a/os/src/arch/loongarch64/trap.S b/os/src/arch/loongarch64/trap.S index db305964..ec02cd99 100644 --- a/os/src/arch/loongarch64/trap.S +++ b/os/src/arch/loongarch64/trap.S @@ -90,18 +90,50 @@ __restore: .globl __trap_from_kernel .align 3 __trap_from_kernel: + # Save all caller-saved GPRs (LoongArch ELF ABI: ra, a0-a7, t0-t8). + # The Rust trap_from_kernel and its callees freely clobber these + # registers; failing to preserve them corrupts the interrupted + # kernel code when the handler returns via ertn. addi.d $sp, $sp, -256 st.d $ra, $sp, 0 st.d $a0, $sp, 8 st.d $a1, $sp, 16 - st.d $t0, $sp, 24 - st.d $t1, $sp, 32 + st.d $a2, $sp, 24 + st.d $a3, $sp, 32 + st.d $a4, $sp, 40 + st.d $a5, $sp, 48 + st.d $a6, $sp, 56 + st.d $a7, $sp, 64 + st.d $t0, $sp, 72 + st.d $t1, $sp, 80 + st.d $t2, $sp, 88 + st.d $t3, $sp, 96 + st.d $t4, $sp, 104 + st.d $t5, $sp, 112 + st.d $t6, $sp, 120 + st.d $t7, $sp, 128 + st.d $t8, $sp, 136 + la.local $t0, trap_from_kernel jirl $ra, $t0, 0 + ld.d $ra, $sp, 0 ld.d $a0, $sp, 8 ld.d $a1, $sp, 16 - ld.d $t0, $sp, 24 - ld.d $t1, $sp, 32 + ld.d $a2, $sp, 24 + ld.d $a3, $sp, 32 + ld.d $a4, $sp, 40 + ld.d $a5, $sp, 48 + ld.d $a6, $sp, 56 + ld.d $a7, $sp, 64 + ld.d $t0, $sp, 72 + ld.d $t1, $sp, 80 + ld.d $t2, $sp, 88 + ld.d $t3, $sp, 96 + ld.d $t4, $sp, 104 + ld.d $t5, $sp, 112 + ld.d $t6, $sp, 120 + ld.d $t7, $sp, 128 + ld.d $t8, $sp, 136 addi.d $sp, $sp, 256 ertn diff --git a/os/src/config.rs b/os/src/config.rs index cf3379f6..dcba8ceb 100644 --- a/os/src/config.rs +++ b/os/src/config.rs @@ -11,7 +11,7 @@ pub const MAX_KERNEL_HEAP_SIZE: usize = 0x4000_0000; /// max harts reserved by the kernel SMP bootstrap path pub const MAX_HARTS: usize = 8; /// QEMU virt 1GiB 内存的物理结束地址,起始地址为 0x8000_0000。 -pub const MEMORY_END: usize = 0xC0000000; +pub const MEMORY_END: usize = 0xB0000000; /// page size : 4KB pub const PAGE_SIZE: usize = 0x1000; /// page size bits: 12 diff --git a/os/src/syscall/mod.rs b/os/src/syscall/mod.rs index e28efe72..f4de6110 100644 --- a/os/src/syscall/mod.rs +++ b/os/src/syscall/mod.rs @@ -294,6 +294,8 @@ pub const SYSCALL_KEYCTL: usize = 219; pub const SYSCALL_MUNMAP: usize = 215; /// clone syscall pub const SYSCALL_CLONE: usize = 220; +/// clone3 syscall +pub const SYSCALL_CLONE3: usize = 435; /// execve syscall pub const SYSCALL_EXECVE: usize = 221; /// mmap syscall @@ -812,6 +814,7 @@ pub fn syscall(syscall_id: usize, args: [usize; 6]) -> isize { SYSCALL_SHMAT => sys_shmat(args[0], args[1], args[2] as i32), SYSCALL_SHMDT => sys_shmdt(args[0]), SYSCALL_CLONE => sys_clone(ArchSyscallAbi::decode_clone_args(args)), + SYSCALL_CLONE3 => sys_clone3(args[0] as *const Clone3Args, args[1]), SYSCALL_UNSHARE => sys_unshare(args[0]), SYSCALL_SETNS => sys_setns(args[0] as i32, args[1] as i32), SYSCALL_EXECVE => sys_execve( diff --git a/os/src/syscall/process.rs b/os/src/syscall/process.rs index e3ba2ab0..82e69c9a 100644 --- a/os/src/syscall/process.rs +++ b/os/src/syscall/process.rs @@ -1,16 +1,17 @@ use crate::hal::traits::CloneArgs; use crate::mm::{frame_allocator_stats, MapPermission, USER_SPACE_END, VirtAddr}; use crate::syscall::errno::{OrErrno, ERRNO}; -use crate::syscall::{read_pod_from_user, translated_byte_buffer_with_access, write_pod_to_user, Pod}; +use crate::syscall::{ + read_bytes_from_user, read_pod_from_user, write_pod_to_user, Pod, +}; use crate::syscall_body; -use crate::task::yield_current_and_run_next; use crate::timer::get_time_ns; use crate::{ config::PAGE_SIZE, fs::{canonicalize, open_file, open_file_at, File, OpenFlags}, hal::hartid, ipc::{self, IPC_RMID}, - mm::{translated_ref, translated_str, PageFaultAccess}, + mm::{translated_ref, translated_str}, task::{ current_process, current_task, current_trap_cx, current_user_token, exit_current_and_run_next, exit_group_current_and_run_next, thread_id2task, ExitReason, @@ -19,7 +20,7 @@ use crate::{ }; use crate::sched::{add_task, list_pids, pid2process, remove_from_pid2process}; -use alloc::{string::String, sync::Arc, vec, vec::Vec}; +use alloc::{string::String, sync::Arc, vec::Vec}; const UID_NO_CHANGE: u32 = u32::MAX; const LINUX_CAPABILITY_VERSION_1: u32 = 0x1998_0330; @@ -769,6 +770,85 @@ const CLONE_EXIT_SIGNAL_SIGCHLD: usize = 17; /// Linux clone flags 中低 8 位保存退出信号。 const CLONE_EXIT_SIGNAL_MASK: usize = 0xff; +/// Linux `clone3` 的用户态参数布局。 +#[repr(C)] +#[derive(Debug, Clone, Copy, Default)] +pub struct Clone3Args { + pub flags: u64, + pub pidfd: u64, + pub child_tid: u64, + pub parent_tid: u64, + pub exit_signal: u64, + pub stack: u64, + pub stack_size: u64, + pub tls: u64, + pub set_tid: u64, + pub set_tid_size: u64, + pub cgroup: u64, +} + +const CLONE3_ARGS_SIZE: usize = core::mem::size_of::(); + +#[derive(Debug, Clone, Copy)] +struct CloneRequest { + flags: usize, + exit_signal: usize, + stack: usize, + stack_size: usize, + parent_tid: usize, + tls: usize, + child_tid: usize, +} + +impl CloneRequest { + fn from_legacy(args: CloneArgs) -> Self { + Self { + flags: args.flags, + exit_signal: args.flags & CLONE_EXIT_SIGNAL_MASK, + stack: args.stack, + stack_size: 0, + parent_tid: args.parent_tid, + tls: args.tls, + child_tid: args.child_tid, + } + } + + fn from_clone3(args: Clone3Args, size: usize) -> Result { + if size == 0 { + return Err(ERRNO::EINVAL); + } + if size > CLONE3_ARGS_SIZE { + return Err(ERRNO::E2BIG); + } + let flags = args.flags as usize; + if flags & CLONE_EXIT_SIGNAL_MASK != 0 { + warn!("kernel: sys_clone3 flags may not contain an exit signal"); + return Err(ERRNO::EINVAL); + } + if args.pidfd != 0 || args.set_tid != 0 || args.set_tid_size != 0 || args.cgroup != 0 { + warn!("kernel: sys_clone3 unsupported pidfd/set_tid/cgroup fields"); + return Err(ERRNO::EINVAL); + } + let exit_signal = args.exit_signal as usize; + if exit_signal != 0 && exit_signal != CLONE_EXIT_SIGNAL_SIGCHLD { + warn!( + "kernel: sys_clone3 unsupported exit signal {}, only SIGCHLD/0 is implemented", + exit_signal + ); + return Err(ERRNO::EINVAL); + } + Ok(Self { + flags, + exit_signal, + stack: args.stack as usize, + stack_size: args.stack_size as usize, + parent_tid: args.parent_tid as usize, + tls: args.tls as usize, + child_tid: args.child_tid as usize, + }) + } +} + bitflags! { /// Linux `clone` 的功能标志位,不包含低 8 位退出信号。 struct CloneFlags: usize { @@ -815,20 +895,27 @@ bitflags! { /// /// 当前支持 fork-like 进程创建,以及 musl pthread 使用的 CLONE_VM 线程创建子集。 pub fn sys_clone(args: CloneArgs) -> isize { + sys_clone_request(CloneRequest::from_legacy(args)) +} + +fn sys_clone_request(req: CloneRequest) -> isize { syscall_body!({ - let CloneArgs { + let CloneRequest { flags, + exit_signal, stack, + stack_size, parent_tid, tls, child_tid, - } = args; - let clone_flags_arg = flags; + } = req; + let clone_flags_arg = flags | exit_signal; trace!( - "kernel:pid[{}] sys_clone flags={:#x} stack={:#x}", + "kernel:pid[{}] sys_clone flags={:#x} stack={:#x} stack_size={:#x}", current_task().unwrap().process.upgrade().unwrap().getpid(), - flags, + clone_flags_arg, stack, + stack_size, ); debug!( "kernel: sys_clone enter flags={:#x} parent_tid={:#x} child_tid={:#x}", @@ -837,7 +924,6 @@ pub fn sys_clone(args: CloneArgs) -> isize { child_tid ); - let exit_signal = flags & CLONE_EXIT_SIGNAL_MASK; let raw_clone_flags = flags & !CLONE_EXIT_SIGNAL_MASK; let flags = CloneFlags::from_bits_truncate(raw_clone_flags); let unsupported_flags = raw_clone_flags & !CloneFlags::all().bits(); @@ -934,6 +1020,15 @@ pub fn sys_clone(args: CloneArgs) -> isize { .map_err(|_| ERRNO::ENOMEM)?; let new_tid = new_task.inner_exclusive_access().res.as_ref().unwrap().thread_id() as i32; let new_inner_tid = new_task.inner_exclusive_access().res.as_ref().unwrap().tid; + let child_user_sp = if stack_size != 0 { + if stack == 0 { + warn!("kernel: sys_clone clone3 stack_size set without a stack base"); + return Err(ERRNO::EINVAL); + } + stack.checked_add(stack_size).ok_or(ERRNO::EINVAL)? + } else { + stack + }; debug!( "kernel: sys_clone thread: new_tid(thread_id)={} inner_tid={} parent_set_tid_addr={:#x} child_set_tid_addr={:#x} clear_child_tid_addr={:#x}", new_tid, @@ -953,8 +1048,8 @@ pub fn sys_clone(args: CloneArgs) -> isize { *trap_cx = inherited_cx; trap_cx.set_kernel_sp(new_task.kstack.get_top()); trap_cx.set_syscall_ret(0); - if stack != 0 { - trap_cx.set_user_sp(stack); + if child_user_sp != 0 { + trap_cx.set_user_sp(child_user_sp); } if let Some(tls) = child_tls { trap_cx.set_tls(tls); @@ -992,6 +1087,29 @@ pub fn sys_clone(args: CloneArgs) -> isize { }) } +/// Linux `clone3` syscall。 +pub fn sys_clone3(uargs: *const Clone3Args, size: usize) -> isize { + syscall_body!({ + if uargs.is_null() { + return Err(ERRNO::EFAULT); + } + let mut args = Clone3Args::default(); + let copy_len = size.min(CLONE3_ARGS_SIZE); + if copy_len != 0 { + let bytes = read_bytes_from_user(uargs as *const u8, copy_len)?; + unsafe { + core::ptr::copy_nonoverlapping( + bytes.as_ptr(), + &mut args as *mut Clone3Args as *mut u8, + copy_len, + ); + } + } + let req = CloneRequest::from_clone3(args, size)?; + Ok(sys_clone_request(req)) + }) +} + /// `setns` 兼容实现。 /// /// 当前内核尚未提供独立 namespace 隔离,但 LTP 的网络 helper 需要 diff --git a/user/src/lib.rs b/user/src/lib.rs index 35082198..56344561 100644 --- a/user/src/lib.rs +++ b/user/src/lib.rs @@ -610,6 +610,11 @@ pub fn fork() -> isize { sys_clone(SIGCHLD as usize, 0, 0, 0, 0) } +/// Linux `clone3` 系统调用的便捷封装。 +pub fn clone3(args: &Clone3Args) -> isize { + sys_clone3(args as *const _, core::mem::size_of::()) +} + /// 兼容接口:执行程序(不传环境变量),内部等价于 `execve(path, args, [NULL])`。 pub fn exec(path: &str, args: &[*const u8]) -> isize { let envp: [*const u8; 1] = [core::ptr::null()]; diff --git a/user/src/syscall.rs b/user/src/syscall.rs index 08f24749..d1fc8470 100644 --- a/user/src/syscall.rs +++ b/user/src/syscall.rs @@ -71,6 +71,7 @@ pub const SYSCALL_SENDMSG: usize = 211; pub const SYSCALL_RECVMSG: usize = 212; pub const SYSCALL_GETTID: usize = 178; pub const SYSCALL_CLONE: usize = 220; +pub const SYSCALL_CLONE3: usize = 435; pub const SYSCALL_EXECVE: usize = 221; pub const SYSCALL_WAITPID: usize = 260; pub const SYSCALL_RENAMEAT2: usize = 276; @@ -94,6 +95,22 @@ pub const SYSCALL_CONDVAR_CREATE: usize = 471; pub const SYSCALL_CONDVAR_SIGNAL: usize = 472; pub const SYSCALL_CONDVAR_WAIT: usize = 473; +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct Clone3Args { + pub flags: u64, + pub pidfd: u64, + pub child_tid: u64, + pub parent_tid: u64, + pub exit_signal: u64, + pub stack: u64, + pub stack_size: u64, + pub tls: u64, + pub set_tid: u64, + pub set_tid_size: u64, + pub cgroup: u64, +} + #[cfg(target_arch = "riscv64")] pub fn syscall(id: usize, args: [usize; 3]) -> isize { let mut ret: isize; @@ -538,6 +555,10 @@ pub fn sys_clone( ) } +pub fn sys_clone3(uargs: *const Clone3Args, size: usize) -> isize { + syscall(SYSCALL_CLONE3, [uargs as usize, size, 0]) +} + pub fn sys_execve(path: &str, args: &[*const u8], envp: &[*const u8]) -> isize { syscall( SYSCALL_EXECVE, From 0d5ddd9e7e045c7f848a67df496f603887051b9b Mon Sep 17 00:00:00 2001 From: Kyle Date: Wed, 17 Jun 2026 11:05:31 +0800 Subject: [PATCH 3/4] fix: Acquire bootinfo in LA. Support non-continuous physical memory. --- Makefile | 3 + bootloader/loongarch64-direct/src/main.rs | 5 +- os/src/bootinfo.rs | 625 +++++++++++++++++++++ os/src/config.rs | 4 +- os/src/fs/page_cache.rs | 19 +- os/src/main.rs | 15 +- os/src/mm/frame_allocator.rs | 121 +++- os/src/mm/heap_allocator.rs | 20 +- os/src/mm/memory_set.rs | 50 +- os/src/platform/loongarch/qemu_virt/mod.rs | 8 +- 10 files changed, 810 insertions(+), 60 deletions(-) create mode 100644 os/src/bootinfo.rs diff --git a/Makefile b/Makefile index 2f265c0c..b0fde9e0 100644 --- a/Makefile +++ b/Makefile @@ -316,6 +316,9 @@ run-la: check-kernel-la $(LA_BOOTLOADER_ELF) $(DISK_LA_IMG) prepare-run-test-fs- fast-run: check-kernel $(QEMU) -machine virt -kernel kernel-rv -m $(MEM) -nographic -smp $(SMP) -bios default $(QEMU_COMP_BLK_ARGS) -device virtio-net-device,netdev=net -netdev $(FAST_RUN_QEMU_NETDEV) -no-reboot -rtc base=utc $(QEMU_COMP_EXTRA_BLK_ARGS) $(QEMU_TRACE_ARGS) +fast-run-trace: check-kernel + $(QEMU) -machine virt -kernel kernel-rv -m $(MEM) -nographic -smp $(SMP) -bios default $(QEMU_COMP_BLK_ARGS) -device virtio-net-device,netdev=net -netdev $(FAST_RUN_QEMU_NETDEV) -no-reboot -rtc base=utc $(QEMU_COMP_EXTRA_BLK_ARGS) $(QEMU_TRACE_ARGS) -d int,in_asm -D qemu.log + fast-run-la: check-kernel-la $(LA_BOOTLOADER_ELF) $(QEMU_LA) -machine virt -cpu la464 -kernel $(LA_BOOTLOADER_ELF) -device loader,file=kernel-la,addr=$(LA_KERNEL_ENTRY_PA) -m $(MEM_LA) -nographic -smp $(SMP) $(QEMU_LA_BLK_ARGS) -device virtio-net-pci,netdev=net0,id=net0 -netdev $(QEMU_LA_NETDEV) -no-reboot -rtc base=utc $(QEMU_LA_EXTRA_BLK_ARGS) diff --git a/bootloader/loongarch64-direct/src/main.rs b/bootloader/loongarch64-direct/src/main.rs index 547117f1..1b16ba03 100644 --- a/bootloader/loongarch64-direct/src/main.rs +++ b/bootloader/loongarch64-direct/src/main.rs @@ -10,6 +10,7 @@ const UART_THR: usize = 0x00; const UART_LSR: usize = 0x05; const UART_LSR_THRE: u8 = 1 << 5; const KERNEL_ENTRY: usize = 0x9000_0000_9000_0000; +const FDT_BASE: usize = 0x0010_0000; #[unsafe(no_mangle)] static mut BOOT_STACK: [u8; 4096] = [0; 4096]; @@ -45,8 +46,8 @@ extern "C" fn boot_main(hart_id: usize) -> ! { puts("[boot] jumping to kernel @ 0x90000000\r\n"); } unsafe { - let entry: extern "C" fn(usize) -> ! = core::mem::transmute(KERNEL_ENTRY); - entry(hart_id); + let entry: extern "C" fn(usize, usize) -> ! = core::mem::transmute(KERNEL_ENTRY); + entry(hart_id, FDT_BASE); } } diff --git a/os/src/bootinfo.rs b/os/src/bootinfo.rs new file mode 100644 index 00000000..4e02a445 --- /dev/null +++ b/os/src/bootinfo.rs @@ -0,0 +1,625 @@ +//! Early boot information discovered from firmware device trees. + +use core::cmp::{max, min}; +use core::ptr; +use core::sync::atomic::{AtomicBool, Ordering}; + +use crate::config::MAX_HARTS; +use crate::console::print; + +const MAX_MEMORY_REGIONS: usize = 8; +const MAX_RESERVED_REGIONS: usize = 16; +const FDT_MAGIC: u32 = 0xd00d_feed; +const FDT_BEGIN_NODE: u32 = 1; +const FDT_END_NODE: u32 = 2; +const FDT_PROP: u32 = 3; +const FDT_NOP: u32 = 4; +const FDT_END: u32 = 9; + +/// One physical memory byte range. +#[derive(Clone, Copy, Debug)] +pub struct PhysMemoryRegion { + /// Inclusive physical start address. + pub start: usize, + /// Exclusive physical end address. + pub end: usize, +} + +impl PhysMemoryRegion { + const fn empty() -> Self { + Self { start: 0, end: 0 } + } + + const fn new(start: usize, end: usize) -> Self { + Self { start, end } + } + + fn is_empty(&self) -> bool { + self.start >= self.end + } +} + +/// Fixed-capacity boot information available before the heap is initialized. +#[derive(Clone, Copy, Debug)] +pub struct BootInfo { + memory_regions: [PhysMemoryRegion; MAX_MEMORY_REGIONS], + memory_region_count: usize, + reserved_regions: [PhysMemoryRegion; MAX_RESERVED_REGIONS], + reserved_region_count: usize, + hart_count: usize, + fdt_ptr: usize, + fdt_size: usize, +} + +impl BootInfo { + const fn empty() -> Self { + Self { + memory_regions: [PhysMemoryRegion::empty(); MAX_MEMORY_REGIONS], + memory_region_count: 0, + reserved_regions: [PhysMemoryRegion::empty(); MAX_RESERVED_REGIONS], + reserved_region_count: 0, + hart_count: 0, + fdt_ptr: 0, + fdt_size: 0, + } + } + + fn push_memory_region(&mut self, start: usize, end: usize) { + if start >= end || self.memory_region_count >= MAX_MEMORY_REGIONS { + return; + } + self.memory_regions[self.memory_region_count] = PhysMemoryRegion::new(start, end); + self.memory_region_count += 1; + } + + fn push_reserved_region(&mut self, start: usize, end: usize) { + if start >= end || self.reserved_region_count >= MAX_RESERVED_REGIONS { + return; + } + self.reserved_regions[self.reserved_region_count] = PhysMemoryRegion::new(start, end); + self.reserved_region_count += 1; + } + + fn set_hart_count(&mut self, hart_count: usize) { + self.hart_count = hart_count.clamp(1, MAX_HARTS); + } + + /// Return the firmware RAM ranges. + pub fn memory_regions(&self) -> &[PhysMemoryRegion] { + &self.memory_regions[..self.memory_region_count] + } + + /// Return physical regions reserved by firmware or by the boot protocol. + pub fn reserved_regions(&self) -> &[PhysMemoryRegion] { + &self.reserved_regions[..self.reserved_region_count] + } + + /// Return the discovered hart count. + pub fn hart_count(&self) -> usize { + self.hart_count + } + + /// Return the FDT virtual address and size used for discovery, when known. + pub fn fdt_blob(&self) -> Option<(usize, usize)> { + (self.fdt_ptr != 0 && self.fdt_size != 0).then_some((self.fdt_ptr, self.fdt_size)) + } +} + +static READY: AtomicBool = AtomicBool::new(false); + +/// The global boot information instance, initialized by the bootstrap hart and read by secondary harts. +pub static mut BOOT_INFO: BootInfo = BootInfo::empty(); + +/// Initialize global boot information from an optional FDT pointer. +pub fn init(fdt_ptr: usize) { + let mut info = BootInfo::empty(); + let mut source = FdtSource::from_ptr(fdt_ptr); + + if source.is_none() { + source = platform_fdt_source(); + } + + if let Some(source) = source { + if let Some(fdt) = Fdt::new(source.ptr) { + fdt.fill_boot_info(&mut info); + info.fdt_ptr = source.ptr; + info.fdt_size = fdt.total_size; + if source.reserve_physical_blob { + let fdt_pa = crate::platform::direct_map_virt_to_phys(source.ptr); + info.push_reserved_region(fdt_pa, fdt_pa.saturating_add(fdt.total_size)); + } + } + } + + if info.memory_region_count == 0 { + fallback_memory_regions(&mut info); + } + platform_reserved_regions(&mut info); + if info.hart_count == 0 { + info.hart_count = 1; + } + + unsafe { + ptr::write(ptr::addr_of_mut!(BOOT_INFO), info); + } + READY.store(true, Ordering::Release); +} + +/// Return the currently discovered boot information. +pub fn get() -> &'static BootInfo { + if !READY.load(Ordering::Acquire) { + return fallback_boot_info(); + } + unsafe { &*ptr::addr_of!(BOOT_INFO) } +} + +/// Return the discovered hart count. +pub fn hart_count() -> usize { + get().hart_count() +} + +/// Iterate usable RAM ranges after subtracting reserved ranges and call `f`. +pub fn for_each_usable_memory_region(mut f: impl FnMut(PhysMemoryRegion)) { + let info = get(); + for region in info.memory_regions() { + let mut fragments = [PhysMemoryRegion::empty(); MAX_RESERVED_REGIONS + 1]; + let mut fragment_count = 1usize; + fragments[0] = *region; + + for reserved in info.reserved_regions() { + let mut next = [PhysMemoryRegion::empty(); MAX_RESERVED_REGIONS + 1]; + let mut next_count = 0usize; + for fragment in fragments[..fragment_count].iter().copied() { + subtract_region(fragment, *reserved, &mut next, &mut next_count); + } + fragments = next; + fragment_count = next_count; + } + + for fragment in fragments[..fragment_count].iter().copied() { + if !fragment.is_empty() { + f(fragment); + } + } + } +} + +fn subtract_region( + region: PhysMemoryRegion, + reserved: PhysMemoryRegion, + out: &mut [PhysMemoryRegion], + out_count: &mut usize, +) { + let overlap_start = max(region.start, reserved.start); + let overlap_end = min(region.end, reserved.end); + if overlap_start >= overlap_end { + push_temp_region(region, out, out_count); + return; + } + push_temp_region(PhysMemoryRegion::new(region.start, overlap_start), out, out_count); + push_temp_region(PhysMemoryRegion::new(overlap_end, region.end), out, out_count); +} + +fn push_temp_region(region: PhysMemoryRegion, out: &mut [PhysMemoryRegion], out_count: &mut usize) { + if region.is_empty() || *out_count >= out.len() { + return; + } + out[*out_count] = region; + *out_count += 1; +} + +fn fallback_boot_info() -> &'static BootInfo { + static mut FALLBACK: BootInfo = BootInfo::empty(); + static FALLBACK_READY: AtomicBool = AtomicBool::new(false); + if !FALLBACK_READY.load(Ordering::Acquire) { + let mut info = BootInfo::empty(); + fallback_memory_regions(&mut info); + platform_reserved_regions(&mut info); + info.hart_count = 1; + unsafe { + ptr::write(ptr::addr_of_mut!(FALLBACK), info); + } + FALLBACK_READY.store(true, Ordering::Release); + } + unsafe { &*ptr::addr_of!(FALLBACK) } +} + +fn fallback_memory_regions(info: &mut BootInfo) { + info.push_memory_region(fallback_memory_start(), crate::config::MEMORY_END); +} + +#[cfg(target_arch = "riscv64")] +fn platform_reserved_regions(info: &mut BootInfo) { + // RustSBI/OpenSBI occupies the low part of QEMU virt RAM and protects it + // with PMP. It is usually not described as reserved in the payload DTB, so + // do not let the S-mode frame allocator write freelist metadata there. + info.push_reserved_region(0x8000_0000, 0x8020_0000); +} + +#[cfg(target_arch = "loongarch64")] +fn platform_reserved_regions(_info: &mut BootInfo) {} + +#[cfg(target_arch = "riscv64")] +fn fallback_memory_start() -> usize { + 0x8000_0000 +} + +#[cfg(target_arch = "loongarch64")] +fn fallback_memory_start() -> usize { + 0x8000_0000 +} + +#[derive(Clone, Copy)] +struct FdtSource { + ptr: usize, + reserve_physical_blob: bool, +} + +impl FdtSource { + fn from_ptr(ptr: usize) -> Option { + if ptr == 0 { + return None; + } + + #[cfg(target_arch = "loongarch64")] + let ptr = if ptr < crate::platform::KERNEL_ADDR_OFFSET { + crate::platform::direct_map_phys_to_virt(ptr) + } else { + ptr + }; + + (ptr != 0).then_some(Self { + ptr, + reserve_physical_blob: true, + }) + } +} + +#[cfg(target_arch = "loongarch64")] +fn platform_fdt_source() -> Option { + fw_cfg::load_fdt() +} + +#[cfg(not(target_arch = "loongarch64"))] +fn platform_fdt_source() -> Option { + None +} + +struct Fdt { + base: usize, + total_size: usize, + off_dt_struct: usize, + off_dt_strings: usize, + size_dt_strings: usize, +} + +impl Fdt { + fn new(base: usize) -> Option { + let magic = read_be_u32_at(base)?; + if magic != FDT_MAGIC { + return None; + } + let total_size = read_be_u32_at(base + 4)? as usize; + let off_dt_struct = read_be_u32_at(base + 8)? as usize; + let off_dt_strings = read_be_u32_at(base + 12)? as usize; + let size_dt_strings = read_be_u32_at(base + 36)? as usize; + if total_size < 40 + || off_dt_struct >= total_size + || off_dt_strings >= total_size + || off_dt_strings.saturating_add(size_dt_strings) > total_size + { + return None; + } + Some(Self { + base, + total_size, + off_dt_struct, + off_dt_strings, + size_dt_strings, + }) + } + + fn fill_boot_info(&self, info: &mut BootInfo) { + self.parse_mem_reserve(info); + let mut cursor = self.base + self.off_dt_struct; + let end = self.base + self.total_size; + let mut depth = 0usize; + let mut current = NodeState::default(); + let mut stack = [NodeState::default(); 16]; + + while cursor + 4 <= end { + let Some(token) = read_be_u32_at(cursor) else { + break; + }; + cursor += 4; + match token { + FDT_BEGIN_NODE => { + if depth < stack.len() { + stack[depth] = current; + } + let name_start = cursor; + while cursor < end && read_u8_at(cursor) != Some(0) { + cursor += 1; + } + let name = bytes_at(name_start, cursor.saturating_sub(name_start)).unwrap_or(&[]); + cursor = align4(cursor.saturating_add(1)); + current = NodeState::for_child(stack.get(depth).copied().unwrap_or_default(), name); + depth += 1; + } + FDT_END_NODE => { + current.finish(info); + depth = depth.saturating_sub(1); + current = stack.get(depth).copied().unwrap_or_default(); + } + FDT_PROP => { + if cursor + 8 > end { + break; + } + let len = read_be_u32_at(cursor).unwrap_or(0) as usize; + let nameoff = read_be_u32_at(cursor + 4).unwrap_or(usize::MAX as u32) as usize; + cursor += 8; + let Some(prop_name) = self.string(nameoff) else { + cursor = align4(cursor.saturating_add(len)); + continue; + }; + let value = bytes_at(cursor, len).unwrap_or(&[]); + current.apply_property(prop_name, value, info); + cursor = align4(cursor.saturating_add(len)); + } + FDT_NOP => {} + FDT_END => break, + _ => break, + } + } + } + + fn parse_mem_reserve(&self, info: &mut BootInfo) { + let mut cursor = self.base + 40; + loop { + let Some(address) = read_be_u64_at(cursor) else { + break; + }; + let Some(size) = read_be_u64_at(cursor + 8) else { + break; + }; + cursor += 16; + if address == 0 && size == 0 { + break; + } + let start = address as usize; + let end = start.saturating_add(size as usize); + info.push_reserved_region(start, end); + } + } + + fn string(&self, offset: usize) -> Option<&'static [u8]> { + if offset >= self.size_dt_strings { + return None; + } + let start = self.base + self.off_dt_strings + offset; + let limit = self.base + self.off_dt_strings + self.size_dt_strings; + let mut end = start; + while end < limit && read_u8_at(end) != Some(0) { + end += 1; + } + bytes_at(start, end.saturating_sub(start)) + } +} + +#[derive(Clone, Copy, Default)] +struct NodeState { + parent_is_cpus: bool, + is_cpus: bool, + is_cpu: bool, + is_memory: bool, + is_reserved_memory: bool, + address_cells: usize, + size_cells: usize, + child_address_cells: usize, + child_size_cells: usize, + status_ok: bool, +} + +impl NodeState { + fn for_child(parent: Self, name: &[u8]) -> Self { + let is_cpus = name == b"cpus"; + let is_cpu = parent.is_cpus && starts_with(name, b"cpu@"); + let is_memory = name == b"memory" || starts_with(name, b"memory@"); + let is_reserved_memory = parent.is_reserved_memory || name == b"reserved-memory"; + Self { + parent_is_cpus: parent.is_cpus, + is_cpus, + is_cpu, + is_memory, + is_reserved_memory, + address_cells: parent.child_address_cells.max(1), + size_cells: parent.child_size_cells.max(1), + child_address_cells: 2, + child_size_cells: 1, + status_ok: true, + } + } + + fn apply_property(&mut self, name: &[u8], value: &[u8], info: &mut BootInfo) { + match name { + b"#address-cells" => self.child_address_cells = read_cells_usize(value, 1).unwrap_or(2), + b"#size-cells" => self.child_size_cells = read_cells_usize(value, 1).unwrap_or(1), + b"status" => self.status_ok = value == b"okay\0" || value == b"ok\0" || value.is_empty(), + b"device_type" if self.parent_is_cpus && value == b"cpu\0" => self.is_cpu = true, + b"device_type" if value == b"memory\0" => self.is_memory = true, + b"reg" if self.is_memory && self.status_ok => { + parse_reg(value, self.address_cells, self.size_cells, |start, size| { + info.push_memory_region(start, start.saturating_add(size)); + }); + } + b"reg" if self.is_reserved_memory && !self.is_memory => { + parse_reg(value, self.address_cells, self.size_cells, |start, size| { + info.push_reserved_region(start, start.saturating_add(size)); + }); + } + _ => {} + } + } + + fn finish(&self, info: &mut BootInfo) { + if self.is_cpu && self.status_ok { + info.set_hart_count(info.hart_count.saturating_add(1)); + } + } +} + +fn parse_reg(mut value: &[u8], address_cells: usize, size_cells: usize, mut f: impl FnMut(usize, usize)) { + let stride = (address_cells + size_cells) * 4; + if stride == 0 { + return; + } + while value.len() >= stride { + let Some(start) = read_cells_usize(&value[..address_cells * 4], address_cells) else { + break; + }; + let size_offset = address_cells * 4; + let Some(size) = read_cells_usize(&value[size_offset..size_offset + size_cells * 4], size_cells) else { + break; + }; + if size != 0 { + f(start, size); + } + value = &value[stride..]; + } +} + +fn read_cells_usize(value: &[u8], cells: usize) -> Option { + if cells == 0 || cells > 2 || value.len() < cells * 4 { + return None; + } + let mut out = 0usize; + for cell in 0..cells { + out = (out << 32) | read_be_u32(value.get(cell * 4..cell * 4 + 4)?)? as usize; + } + Some(out) +} + +fn align4(value: usize) -> usize { + (value + 3) & !3 +} + +fn starts_with(value: &[u8], prefix: &[u8]) -> bool { + value.len() >= prefix.len() && &value[..prefix.len()] == prefix +} + +fn bytes_at(addr: usize, len: usize) -> Option<&'static [u8]> { + if addr == 0 { + return None; + } + Some(unsafe { core::slice::from_raw_parts(addr as *const u8, len) }) +} + +fn read_u8_at(addr: usize) -> Option { + if addr == 0 { + return None; + } + Some(unsafe { ptr::read_volatile(addr as *const u8) }) +} + +fn read_be_u32_at(addr: usize) -> Option { + let bytes = [ + read_u8_at(addr)?, + read_u8_at(addr + 1)?, + read_u8_at(addr + 2)?, + read_u8_at(addr + 3)?, + ]; + Some(u32::from_be_bytes(bytes)) +} + +fn read_be_u64_at(addr: usize) -> Option { + let bytes = [ + read_u8_at(addr)?, + read_u8_at(addr + 1)?, + read_u8_at(addr + 2)?, + read_u8_at(addr + 3)?, + read_u8_at(addr + 4)?, + read_u8_at(addr + 5)?, + read_u8_at(addr + 6)?, + read_u8_at(addr + 7)?, + ]; + Some(u64::from_be_bytes(bytes)) +} + +fn read_be_u32(bytes: &[u8]) -> Option { + Some(u32::from_be_bytes(bytes.try_into().ok()?)) +} + +#[cfg(target_arch = "loongarch64")] +mod fw_cfg { + use core::ptr; + + use super::FdtSource; + + const FW_CFG_BASE: usize = 0x1e02_0000; + const FW_CFG_DATA: usize = 0x00; + const FW_CFG_SELECTOR: usize = 0x08; + const FW_CFG_FILE_DIR: u16 = 0x0019; + const FW_CFG_MAX_FILE: usize = 2 * 1024 * 1024; + const FW_CFG_FILE_NAME_LEN: usize = 56; + + static mut FDT_BUF: [u8; FW_CFG_MAX_FILE] = [0; FW_CFG_MAX_FILE]; + + pub(super) fn load_fdt() -> Option { + select(FW_CFG_FILE_DIR); + let count = read_be_u32()? as usize; + for _ in 0..count { + let size = read_be_u32()? as usize; + let select_id = read_be_u16()?; + let _reserved = read_be_u16()?; + let mut name = [0u8; FW_CFG_FILE_NAME_LEN]; + for byte in &mut name { + *byte = read_u8(); + } + if is_name(&name, b"etc/fdt") { + if size == 0 || size > FW_CFG_MAX_FILE { + return None; + } + select(select_id); + let buf = ptr::addr_of_mut!(FDT_BUF) as *mut u8; + for idx in 0..size { + unsafe { + ptr::write_volatile(buf.add(idx), read_u8()); + } + } + return Some(FdtSource { + ptr: buf as usize, + reserve_physical_blob: false, + }); + } + } + None + } + + fn is_name(name: &[u8], expected: &[u8]) -> bool { + let len = name.iter().position(|byte| *byte == 0).unwrap_or(name.len()); + &name[..len] == expected + } + + fn select(selector: u16) { + let selector_addr = crate::platform::mmio_phys_to_virt(FW_CFG_BASE + FW_CFG_SELECTOR); + unsafe { + ptr::write_volatile(selector_addr as *mut u16, selector.to_be()); + } + } + + fn data_addr() -> usize { + crate::platform::mmio_phys_to_virt(FW_CFG_BASE + FW_CFG_DATA) + } + + fn read_u8() -> u8 { + unsafe { ptr::read_volatile(data_addr() as *const u8) } + } + + fn read_be_u16() -> Option { + Some(u16::from_be_bytes([read_u8(), read_u8()])) + } + + fn read_be_u32() -> Option { + Some(u32::from_be_bytes([read_u8(), read_u8(), read_u8(), read_u8()])) + } +} diff --git a/os/src/config.rs b/os/src/config.rs index dcba8ceb..638f5d9d 100644 --- a/os/src/config.rs +++ b/os/src/config.rs @@ -10,8 +10,8 @@ pub const KERNEL_STACK_SIZE: usize = 4096 * 32; pub const MAX_KERNEL_HEAP_SIZE: usize = 0x4000_0000; /// max harts reserved by the kernel SMP bootstrap path pub const MAX_HARTS: usize = 8; -/// QEMU virt 1GiB 内存的物理结束地址,起始地址为 0x8000_0000。 -pub const MEMORY_END: usize = 0xB0000000; +/// Fallback physical memory end used only when firmware memory discovery fails. +pub const MEMORY_END: usize = 0xC0000000; /// page size : 4KB pub const PAGE_SIZE: usize = 0x1000; /// page size bits: 12 diff --git a/os/src/fs/page_cache.rs b/os/src/fs/page_cache.rs index 29b1e695..7be3f069 100644 --- a/os/src/fs/page_cache.rs +++ b/os/src/fs/page_cache.rs @@ -11,10 +11,11 @@ use fs::errno::FS_ERRNO; use fs::Inode; use lazy_static::lazy_static; -use crate::config::{MEMORY_END, PAGE_SIZE}; +use crate::bootinfo; +use crate::config::PAGE_SIZE; use crate::mm::{ frame_alloc, frame_allocator_stats, invalidate_inode_mappings_after_truncate, FrameTracker, - InodeKey, MmError, PhysAddr, PhysPageNum, + InodeKey, MmError, PhysPageNum, }; use crate::syscall::errno::ERRNO; use crate::sync::SpinNoIrqLock; @@ -33,10 +34,6 @@ static WRITEBACK_BATCH_PAGES: AtomicUsize = AtomicUsize::new(0); const MAX_WRITEBACK_BATCH_PAGES: usize = 32; -extern "C" { - fn ekernel(); -} - bitflags! { /// 单个缓存页的状态位。 struct CachePageState: u8 { @@ -1384,9 +1381,13 @@ fn dynamic_watermarks() -> (usize, usize) { /// 初始化时使用的保守水位估算。 fn default_watermarks() -> (usize, usize) { - let managed_start = PhysAddr::from(crate::mm::virt_to_phys(ekernel as usize)).ceil(); - let managed_end = PhysAddr::from(MEMORY_END).floor(); - let total_pages = max(1, managed_end.0.saturating_sub(managed_start.0)); + let mut total_pages = 0usize; + bootinfo::for_each_usable_memory_region(|region| { + let start = region.start.div_ceil(PAGE_SIZE); + let end = region.end / PAGE_SIZE; + total_pages = total_pages.saturating_add(end.saturating_sub(start)); + }); + let total_pages = max(1, total_pages); let high_watermark = max(128, total_pages / 4); let low_watermark = max(96, high_watermark * 3 / 4); (high_watermark, low_watermark) diff --git a/os/src/main.rs b/os/src/main.rs index ac951c38..9b3b5776 100644 --- a/os/src/main.rs +++ b/os/src/main.rs @@ -35,6 +35,7 @@ extern crate alloc; extern crate bitflags; pub mod arch; +pub mod bootinfo; pub mod hal; pub mod platform; @@ -200,6 +201,11 @@ fn init_local_hart(hart_id: usize) { /// Probe the firmware-visible hart IDs and return the number of usable harts. #[cfg(target_arch = "riscv64")] fn detect_hart_count() -> usize { + let fdt_hart_count = bootinfo::hart_count(); + if fdt_hart_count > 1 { + return fdt_hart_count; + } + const SBI_SUCCESS: isize = 0; const SBI_ERR_INVALID_PARAM: isize = -3; @@ -219,7 +225,7 @@ fn detect_hart_count() -> usize { /// Probe the usable hart count on non-RISC-V platforms. #[cfg(target_arch = "loongarch64")] fn detect_hart_count() -> usize { - 1 + bootinfo::hart_count() } /// 竞争并记录负责一次性全局初始化的 bootstrap hart。 @@ -247,9 +253,10 @@ fn wait_for_bootstrap() { } /// bootstrap hart 的主入口 -fn first_hart_main(hart_id: usize) -> ! { +fn first_hart_main(hart_id: usize, fdt_ptr: usize) -> ! { clear_bss(); BOOT_BSS_READY.store(0, Ordering::Release); + bootinfo::init(fdt_ptr); // Install TLB refill handler and page-walker CSRs before activating page tables. trap::init(); mm::init(); @@ -299,11 +306,11 @@ fn secondary_hart_main(hart_id: usize) -> ! { /// /// 第一个进入该入口的 hart 会成为 bootstrap hart,负责一次性全局初始化 /// 并进入调度器;其他 hart 等待 bootstrap 完成后只做本地初始化并进入 idle。 -pub fn rust_main(hart_id: usize) -> ! { +pub fn rust_main(hart_id: usize, fdt_ptr: usize) -> ! { unsafe { crate::hal::init_with_hartid(hart_id) }; if !try_claim_bootstrap_hart(hart_id) { secondary_hart_main(hart_id); } else { - first_hart_main(hart_id); + first_hart_main(hart_id, fdt_ptr); } } diff --git a/os/src/mm/frame_allocator.rs b/os/src/mm/frame_allocator.rs index e062dbc1..5cf90430 100644 --- a/os/src/mm/frame_allocator.rs +++ b/os/src/mm/frame_allocator.rs @@ -1,16 +1,19 @@ //! Physical page frame allocator -use super::{virt_to_phys, PhysAddr, PhysPageNum}; +use super::{virt_to_phys, PhysPageNum}; +use crate::bootinfo::{self, PhysMemoryRegion}; use crate::fs::PAGE_CACHE_MANAGER; use crate::mm::heap_allocator::KERNEL_HEAP_BYTES; -use crate::{config::MEMORY_END, sync::SpinNoIrqLock}; +use crate::sync::SpinNoIrqLock; +use core::cmp::{max, min}; use core::fmt::{self, Debug, Formatter}; +use core::sync::atomic::Ordering; use lazy_static::*; use virtio_drivers::PAGE_SIZE; -use core::sync::atomic::Ordering; const MAX_ORDER: usize = 32; const INVALID_PPN: usize = usize::MAX; +const MAX_MANAGED_REGIONS: usize = 16; /// tracker for physical page frame allocation and deallocation pub struct FrameTracker { @@ -82,19 +85,73 @@ trait FrameAllocator { pub struct BuddyFrameAllocator { start: usize, end: usize, + regions: [PpnRegion; MAX_MANAGED_REGIONS], + region_count: usize, free_list: [Option; MAX_ORDER], free_pages: usize, allocated_pages: usize, } +#[derive(Clone, Copy)] +struct PpnRegion { + start: usize, + end: usize, +} + +impl PpnRegion { + const fn empty() -> Self { + Self { start: 0, end: 0 } + } +} + impl BuddyFrameAllocator { - pub fn init(&mut self, l: PhysPageNum, r: PhysPageNum) { - self.start = l.0; - self.end = r.0; + pub fn init_from_bootinfo(&mut self, kernel_start: PhysPageNum, kernel_end: PhysPageNum) { + self.reset(); + bootinfo::for_each_usable_memory_region(|region| { + self.add_usable_region(region, kernel_start.0, kernel_end.0); + }); + } + + fn reset(&mut self) { + self.start = usize::MAX; + self.end = 0; + self.regions = [PpnRegion::empty(); MAX_MANAGED_REGIONS]; + self.region_count = 0; self.free_list = [None; MAX_ORDER]; self.free_pages = 0; self.allocated_pages = 0; - self.add_range(l.0, r.0); + } + + fn add_usable_region(&mut self, region: PhysMemoryRegion, kernel_start: usize, kernel_end: usize) { + let start = phys_addr_ceil_ppn(region.start); + let end = phys_addr_floor_ppn(region.end); + if start >= end { + return; + } + + if kernel_start < kernel_end { + self.add_managed_range(start, min(end, kernel_start)); + self.add_managed_range(max(start, kernel_end), end); + } else { + self.add_managed_range(start, end); + } + } + + fn add_managed_range(&mut self, start: usize, end: usize) { + if start >= end || self.region_count >= MAX_MANAGED_REGIONS { + return; + } + self.regions[self.region_count] = PpnRegion { start, end }; + self.region_count += 1; + self.start = self.start.min(start); + self.end = self.end.max(end); + self.add_range(start, end); + } + + fn is_managed_range(&self, ppn: usize, pages: usize) -> bool { + self.regions[..self.region_count] + .iter() + .any(|region| ppn >= region.start && ppn.saturating_add(pages) <= region.end) } fn set_next(ppn: usize, next: Option) { @@ -198,11 +255,12 @@ impl BuddyFrameAllocator { } fn dealloc_order(&mut self, ppn: PhysPageNum, order: usize) { + if order >= MAX_ORDER { + panic!("Frame ppn={:#x}, order={} has not been allocated!", ppn.0, order); + } let mut ppn = ppn.0; let pages = 1usize << order; - if order >= MAX_ORDER - || ppn < self.start - || ppn + pages > self.end + if !self.is_managed_range(ppn, pages) || ppn & (pages - 1) != 0 || self.contains_free_block(ppn) { @@ -215,7 +273,7 @@ impl BuddyFrameAllocator { let mut current_order = order; while current_order + 1 < MAX_ORDER { let buddy = ppn ^ (1usize << current_order); - if buddy < self.start || buddy + (1usize << current_order) > self.end { + if !self.is_managed_range(buddy, 1usize << current_order) { break; } if !self.remove_block(current_order, buddy) { @@ -236,6 +294,8 @@ impl FrameAllocator for BuddyFrameAllocator { Self { start: 0, end: 0, + regions: [PpnRegion::empty(); MAX_MANAGED_REGIONS], + region_count: 0, free_list: [None; MAX_ORDER], free_pages: 0, allocated_pages: 0, @@ -276,12 +336,14 @@ lazy_static! { pub fn init_frame_allocator() { extern "C" { + fn skernel(); fn ekernel(); } - FRAME_ALLOCATOR.lock().init( - PhysAddr::from(virt_to_phys(ekernel as usize)).ceil(), - PhysAddr::from(MEMORY_END).floor(), - ); + let kernel_start = PhysPageNum(phys_addr_floor_ppn(virt_to_phys(skernel as usize))); + let kernel_end = PhysPageNum(phys_addr_ceil_ppn(virt_to_phys(ekernel as usize))); + FRAME_ALLOCATOR + .lock() + .init_from_bootinfo(kernel_start, kernel_end); } /// Return runtime statistics of the frame allocator. @@ -298,7 +360,11 @@ pub fn frame_allocator_stats() -> FrameAllocatorStats { /// Allocate a physical page frame in FrameTracker style pub fn frame_alloc() -> Option { - FRAME_ALLOCATOR.lock().alloc().map(FrameTracker::new).or_else(|| { + let ppn = { + let mut allocator = FRAME_ALLOCATOR.lock(); + allocator.alloc() + }; + ppn.map(FrameTracker::new).or_else(|| { let frame_allocator_stats = frame_allocator_stats(); error!( "frame_alloc: out of memory (free={} cached={} low={} high={} total={})", @@ -320,17 +386,20 @@ pub fn frame_dealloc(ppn: PhysPageNum) { /// Allocate a physically contiguous frame range. /// Simplified implmentation: maybe fail when align_pages > pages (require over-alignment) pub fn frame_alloc_contiguous(pages: usize, align_pages: usize) -> Option { - info!("Allocating contiguous frames: pages={}, align_pages={}", pages, align_pages); if pages == 0 || align_pages == 0 || !pages.is_power_of_two() || !align_pages.is_power_of_two() { return None; } let order = pages.trailing_zeros() as usize; - let start = FRAME_ALLOCATOR.lock().alloc_order(order)?; - if start.0 & (align_pages - 1) != 0 { - FRAME_ALLOCATOR.lock().dealloc_order(start, order); - return None; - } + let start = { + let mut allocator = FRAME_ALLOCATOR.lock(); + let start = allocator.alloc_order(order)?; + if start.0 & (align_pages - 1) != 0 { + allocator.dealloc_order(start, order); + return None; + } + start + }; Some(ContiguousFrames::new(start, pages)) } @@ -354,6 +423,14 @@ fn floor_log2(value: usize) -> usize { usize::BITS as usize - 1 - value.leading_zeros() as usize } +fn phys_addr_floor_ppn(pa: usize) -> usize { + pa / PAGE_SIZE +} + +fn phys_addr_ceil_ppn(pa: usize) -> usize { + pa.saturating_add(PAGE_SIZE - 1) / PAGE_SIZE +} + #[allow(unused)] pub fn frame_allocator_test() { use alloc::vec::Vec; diff --git a/os/src/mm/heap_allocator.rs b/os/src/mm/heap_allocator.rs index b3d10776..7d78fdbf 100644 --- a/os/src/mm/heap_allocator.rs +++ b/os/src/mm/heap_allocator.rs @@ -2,9 +2,7 @@ use super::frame_allocator::{frame_alloc, frame_alloc_contiguous, frame_dealloc}; use super::{phys_to_virt, PageTableEntry, PhysPageNum, PTEFlags, VirtAddr, KERNEL_SPACE}; -use crate::config::{ - KERNEL_HEAP_BASE, MAX_KERNEL_HEAP_SIZE, MEMORY_END, PAGE_SIZE, PAGE_SIZE_BITS, -}; +use crate::config::{KERNEL_HEAP_BASE, MAX_KERNEL_HEAP_SIZE, PAGE_SIZE, PAGE_SIZE_BITS}; use crate::sync::SpinNoIrqLock; use buddy_system_allocator::LockedHeap; use core::alloc::{GlobalAlloc, Layout}; @@ -437,16 +435,14 @@ pub fn heap_test() { extern "C" { fn sbss(); fn ebss(); - fn ekernel(); } let bss_range = sbss as usize..ebss as usize; - let bootstrap_frame_backed_range = ekernel as usize..MEMORY_END; let virtual_heap_range = KERNEL_HEAP_BASE..KERNEL_HEAP_BASE + MAX_KERNEL_HEAP_SIZE; let a = Box::new(5); assert_eq!(*a, 5); let a_ptr = a.as_ref() as *const _ as usize; assert!(!bss_range.contains(&a_ptr)); - assert!(bootstrap_frame_backed_range.contains(&a_ptr) || virtual_heap_range.contains(&a_ptr)); + assert!(is_bootinfo_ram_va(a_ptr) || virtual_heap_range.contains(&a_ptr)); drop(a); let mut v: Vec = Vec::new(); for i in 0..500 { @@ -457,7 +453,17 @@ pub fn heap_test() { } let v_ptr = v.as_ptr() as usize; assert!(!bss_range.contains(&v_ptr)); - assert!(bootstrap_frame_backed_range.contains(&v_ptr) || virtual_heap_range.contains(&v_ptr)); + assert!(is_bootinfo_ram_va(v_ptr) || virtual_heap_range.contains(&v_ptr)); drop(v); println!("heap_test passed!"); } + +#[allow(unused)] +fn is_bootinfo_ram_va(va: usize) -> bool { + let pa = crate::platform::direct_map_virt_to_phys(va); + let mut found = false; + crate::bootinfo::for_each_usable_memory_region(|region| { + found |= pa >= region.start && pa < region.end; + }); + found +} diff --git a/os/src/mm/memory_set.rs b/os/src/mm/memory_set.rs index 3a857ff5..38ae0587 100644 --- a/os/src/mm/memory_set.rs +++ b/os/src/mm/memory_set.rs @@ -4,8 +4,9 @@ use super::{frame_alloc, shootdown, FrameTracker, MmError, PageFaultHandled, Sho use super::{PageTable, PageTableEntry, PTEFlags}; use super::{PhysAddr, PhysPageNum, USER_SPACE_END, VirtAddr, VirtPageNum}; use super::{StepByOne, VPNRange}; +use crate::bootinfo; use crate::config::{ - MEMORY_END, MMIO, PAGE_SIZE, TRAMPOLINE, USER_MMAP_BASE, USER_PIE_BASE, USER_STACK_BASE, USER_STACK_SIZE, + MMIO, PAGE_SIZE, TRAMPOLINE, USER_MMAP_BASE, USER_PIE_BASE, USER_STACK_BASE, USER_STACK_SIZE, USER_VDSO_BASE, }; use crate::fs::{ @@ -35,6 +36,7 @@ extern "C" { fn edata(); fn sbss_with_stack(); fn ebss(); + fn skernel(); fn ekernel(); fn strampoline(); } @@ -151,6 +153,32 @@ fn align_up(value: usize, align: usize) -> Option { value.checked_add(align - 1).map(|v| v & !(align - 1)) } +fn align_up_to_page(value: usize) -> usize { + align_up(value, PAGE_SIZE).unwrap_or(usize::MAX) +} + +fn align_down_to_page(value: usize) -> usize { + value & !(PAGE_SIZE - 1) +} + +fn map_kernel_ram_fragment(memory_set: &mut MemorySet, start: usize, end: usize) { + if start >= end { + return; + } + memory_set + .insert_vma( + Vma::new( + start.into(), + end.into(), + MapType::Identical, + MapPermission::R | MapPermission::W, + VmaKind::Kernel, + ), + None, + ) + .expect("failed to map physical memory window"); +} + fn format_hex_bytes(bytes: &[u8]) -> String { let mut out = String::new(); for (idx, byte) in bytes.iter().enumerate() { @@ -880,18 +908,14 @@ impl MemorySet { ) .expect("failed to map kernel bss"); info!("mapping physical memory"); - memory_set - .insert_vma( - Vma::new( - (ekernel as usize).into(), - MEMORY_END.into(), - MapType::Identical, - MapPermission::R | MapPermission::W, - VmaKind::Kernel, - ), - None, - ) - .expect("failed to map physical memory window"); + let kernel_start = skernel as usize; + let kernel_end = ekernel as usize; + bootinfo::for_each_usable_memory_region(|region| { + let start = align_up_to_page(region.start); + let end = align_down_to_page(region.end); + map_kernel_ram_fragment(&mut memory_set, start, kernel_start.min(end)); + map_kernel_ram_fragment(&mut memory_set, kernel_end.max(start), end); + }); info!("mapping memory-mapped registers"); for pair in MMIO { memory_set diff --git a/os/src/platform/loongarch/qemu_virt/mod.rs b/os/src/platform/loongarch/qemu_virt/mod.rs index acd4624d..f1671af7 100644 --- a/os/src/platform/loongarch/qemu_virt/mod.rs +++ b/os/src/platform/loongarch/qemu_virt/mod.rs @@ -165,8 +165,14 @@ pub fn start_secondary_harts(bootstrap_hart_id: usize) { // physical address would drop APs outside the cached DMW window right // after wakeup. let entry = _start as usize; + let boot_info = crate::bootinfo::get(); + let hart_count = if boot_info.fdt_blob().is_some() { + boot_info.hart_count() + } else { + crate::config::MAX_HARTS + }; enable_ipi(); - for hart_id in 0..crate::config::MAX_HARTS { + for hart_id in 0..hart_count.min(crate::config::MAX_HARTS) { if hart_id == bootstrap_hart_id { continue; } From 960785eb5370117c4c18fdece56767bc4bfff898 Mon Sep 17 00:00:00 2001 From: Kyle Date: Wed, 17 Jun 2026 13:00:25 +0800 Subject: [PATCH 4/4] chore: Update `CosmOS-rootfs` submodule. --- CosmOS-rootfs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CosmOS-rootfs b/CosmOS-rootfs index 57eb5cca..3a97e36f 160000 --- a/CosmOS-rootfs +++ b/CosmOS-rootfs @@ -1 +1 @@ -Subproject commit 57eb5ccaed0aa2b9d5bff83b48d2220238bae213 +Subproject commit 3a97e36fad94eea0055c5d4cd3d9bcfbd73462fb