diff --git a/awkernel_lib/Cargo.toml b/awkernel_lib/Cargo.toml
index aeef0e19f..873ead8f8 100644
--- a/awkernel_lib/Cargo.toml
+++ b/awkernel_lib/Cargo.toml
@@ -13,6 +13,7 @@ array-macro = "2.1"
 embedded-graphics-core = "0.4"
 embedded-graphics = "0.8"
 chrono = { version = "0.4", default-features = false, features = ["clock"], optional = true }
+fdt = { version = "0.1", optional = true }
 
 [dependencies.awkernel_sync]
 git = "https://github.com/tier4/awkernel_sync.git"
@@ -82,7 +83,7 @@ x86 = [
 ]
 aarch64 = ["dep:awkernel_aarch64", "awkernel_sync/aarch64"]
 rv32 = ["dep:riscv", "awkernel_sync/rv32"]
-rv64 = ["awkernel_sync/rv64"]
+rv64 = ["awkernel_sync/rv64", "dep:fdt"]
 std = ["dep:libc", "dep:socket2", "awkernel_sync/std"]
 spinlock = ["awkernel_sync/spinlock"]
 # LFN (Long File Name) support
diff --git a/awkernel_lib/src/arch/rv64.rs b/awkernel_lib/src/arch/rv64.rs
index e23965343..8476abb98 100644
--- a/awkernel_lib/src/arch/rv64.rs
+++ b/awkernel_lib/src/arch/rv64.rs
@@ -3,16 +3,35 @@ pub mod barrier;
 pub(super) mod cpu;
 pub(super) mod delay;
 pub(super) mod dvfs;
+pub(super) mod fdt;
 pub(super) mod frame_allocator;
 pub(super) mod interrupt;
 pub(super) mod page_table;
 pub(super) mod paging;
 pub(super) mod vm;
 
+use core::sync::atomic::{AtomicUsize, Ordering};
+
 pub struct RV64;
 
 impl super::Arch for RV64 {}
 
+/// # Safety
+///
+/// This function must be called at initialization,
+/// and called by the primary CPU.
+pub unsafe fn init_primary() {
+    delay::init_primary();
+}
+
+/// # Safety
+///
+/// This function must be called at initialization,
+/// and called by non-primary CPUs.
+pub unsafe fn init_non_primary() {
+    delay::init_non_primary();
+}
+
 pub fn init_page_allocator() {
     frame_allocator::init_page_allocator();
 }
@@ -39,3 +58,117 @@ pub fn translate_kernel_address(vpn: address::VirtPageNum) -> Option<page_table:
         None
     }
 }
+
+pub fn get_heap_size() -> usize {
+    vm::get_heap_size()
+}
+
+static BOOT_DTB_PTR: AtomicUsize = AtomicUsize::new(0);
+
+/// Record the DTB pointer supplied by firmware/bootloader.
+pub fn set_boot_dtb_ptr(addr: usize) {
+    BOOT_DTB_PTR.store(addr, Ordering::Relaxed);
+}
+
+fn boot_dtb_ptr() -> Option<usize> {
+    match BOOT_DTB_PTR.load(Ordering::Relaxed) {
+        0 => None,
+        addr => Some(addr),
+    }
+}
+
+/// Detect memory size from device tree or by probing
+///
+/// Returns the detected memory size in bytes, or None if detection fails
+pub fn detect_memory_size() -> Option<u64> {
+    unsafe {
+        if let Some(dtb_addr) = boot_dtb_ptr() {
+            if let Some(region) = fdt::detect_memory_from_dtb(dtb_addr) {
+                unsafe_puts("Memory detected from boot DTB pointer at 0x");
+                crate::console::unsafe_print_hex_u32((dtb_addr >> 16) as u32);
+                crate::console::unsafe_print_hex_u32(dtb_addr as u32);
+                unsafe_puts("\r\n");
+                return Some(region.size);
+            } else {
+                unsafe_puts("Boot DTB pointer invalid, falling back to probe\r\n");
+            }
+        }
+
+        // First, try to find DTB at common locations
+        if let Some(dtb_addr) = fdt::probe_dtb_locations() {
+            if let Some(region) = fdt::detect_memory_from_dtb(dtb_addr) {
+                unsafe_puts("Memory detected from DTB at 0x");
+                crate::console::unsafe_print_hex_u32((dtb_addr >> 16) as u32);
+                crate::console::unsafe_print_hex_u32(dtb_addr as u32);
+                unsafe_puts("\r\n");
+                return Some(region.size);
+            }
+        }
+
+        // Fallback: Try memory probing
+        if let Some(size) = fdt::probe_memory_size() {
+            use crate::console::unsafe_puts;
+            unsafe_puts("Memory detected by probing\r\n");
+            return Some(size);
+        }
+    }
+    None
+}
+
+use crate::console::unsafe_puts;
+
+/// Get the memory end address based on detected memory size
+///
+/// This function attempts to detect the actual available RAM and returns
+/// an appropriate MEMORY_END value. Panics if detection fails.
+pub fn get_memory_end() -> u64 {
+    const DRAM_BASE: u64 = 0x80000000;
+
+    if let Some(detected_size) = detect_memory_size() {
+        // Use detected size directly
+        let memory_end = DRAM_BASE + detected_size;
+
+        // Log the detection
+        unsafe {
+            use crate::console::unsafe_puts;
+            unsafe_puts("Detected RAM size: 0x");
+            crate::console::unsafe_print_hex_u32((detected_size >> 32) as u32);
+            crate::console::unsafe_print_hex_u32(detected_size as u32);
+            unsafe_puts("\r\n");
+        }
+
+        memory_end
+    } else {
+        // Failed to detect memory - cannot continue boot safely
+        panic!("Failed to detect memory size from DTB or probing - cannot boot");
+    }
+}
+
+/// Detect CPU count from device tree
+///
+/// Returns the detected number of CPUs. Panics if detection fails.
+pub fn detect_cpu_count() -> usize {
+    unsafe {
+        if let Some(dtb_addr) = boot_dtb_ptr() {
+            if let Some(count) = fdt::detect_cpu_count_from_dtb(dtb_addr) {
+                unsafe_puts("Detected ");
+                crate::console::unsafe_print_hex_u32(count as u32);
+                unsafe_puts(" CPUs from boot DTB\r\n");
+                return count;
+            }
+        }
+
+        // Try to find DTB at common locations
+        if let Some(dtb_addr) = fdt::probe_dtb_locations() {
+            if let Some(count) = fdt::detect_cpu_count_from_dtb(dtb_addr) {
+                unsafe_puts("Detected ");
+                crate::console::unsafe_print_hex_u32(count as u32);
+                unsafe_puts(" CPUs from probed DTB\r\n");
+                return count;
+            }
+        }
+
+        // Failed to detect CPU count - cannot continue boot safely
+        panic!("Failed to detect CPU count from DTB - cannot boot");
+    }
+}
diff --git a/awkernel_lib/src/arch/rv64/address.rs b/awkernel_lib/src/arch/rv64/address.rs
index cf896ca1b..2a0e00f1b 100644
--- a/awkernel_lib/src/arch/rv64/address.rs
+++ b/awkernel_lib/src/arch/rv64/address.rs
@@ -8,7 +8,11 @@ use core::{
 pub const PAGE_SIZE: usize = 0x1000; // 4 KiB
 pub const PAGE_OFFSET: usize = 0xc; // 12 bits
 
-pub const MEMORY_END: u64 = 0x8080_0000;
+// Memory end for QEMU virt with 1GB RAM (-m 1G)
+// QEMU provides 1GB at 0x80000000 - 0xC0000000
+// Using 0xC0000000 directly may be too aggressive,
+// so we use 0xB0000000 (768MB from start) for safety
+pub const MEMORY_END: u64 = 0xB0000000;
 
 /// Design abstraction struct:
 /// PhysAddr, VirtAddr, PhysPageNum, VirtPageNum
diff --git a/awkernel_lib/src/arch/rv64/cpu.rs b/awkernel_lib/src/arch/rv64/cpu.rs
index c579ddeab..309cdf978 100644
--- a/awkernel_lib/src/arch/rv64/cpu.rs
+++ b/awkernel_lib/src/arch/rv64/cpu.rs
@@ -4,7 +4,9 @@ impl CPU for super::RV64 {
     fn cpu_id() -> usize {
         let hartid: usize;
         unsafe {
-            core::arch::asm!("csrr {}, mhartid", out(reg) hartid);
+            // In M-mode, read from tp register (set during boot)
+            // This avoids repeatedly accessing mhartid CSR
+            core::arch::asm!("mv {}, tp", out(reg) hartid);
         }
         hartid
     }
diff --git a/awkernel_lib/src/arch/rv64/delay.rs b/awkernel_lib/src/arch/rv64/delay.rs
index 74aa52711..f55c4f3ae 100644
--- a/awkernel_lib/src/arch/rv64/delay.rs
+++ b/awkernel_lib/src/arch/rv64/delay.rs
@@ -1,9 +1,12 @@
 use crate::delay::Delay;
+use core::sync::atomic::{AtomicU64, Ordering};
 
 // we should get this info from device tree
 const ACLINT_MTIME_BASE: u32 = 0x0200_0000 + 0x0000bff8;
 const RISCV_TIMEBASE_FREQ: u64 = 10_000_000;
 
+static COUNT_START: AtomicU64 = AtomicU64::new(0);
+
 impl Delay for super::RV64 {
     fn wait_interrupt() {
         unsafe { core::arch::asm!("wfi") };
@@ -16,19 +19,19 @@ impl Delay for super::RV64 {
     }
 
     fn uptime() -> u64 {
-        // as microsec
-        unsafe {
-            let mtime = ACLINT_MTIME_BASE as *const u64;
-            *mtime * 1_000_000 / RISCV_TIMEBASE_FREQ
-        }
+        let start = COUNT_START.load(Ordering::Acquire);
+        let mtime = ACLINT_MTIME_BASE as *const u64;
+        let now = unsafe { core::ptr::read_volatile(mtime) };
+        let diff = now - start;
+        diff * 1_000_000 / RISCV_TIMEBASE_FREQ
     }
 
     fn uptime_nano() -> u128 {
-        // as microsec
-        unsafe {
-            let mtime = ACLINT_MTIME_BASE as *const u128;
-            *mtime * 1_000_000_000 / RISCV_TIMEBASE_FREQ as u128
-        }
+        let start = COUNT_START.load(Ordering::Acquire);
+        let mtime = ACLINT_MTIME_BASE as *const u64;
+        let now = unsafe { core::ptr::read_volatile(mtime) };
+        let diff = now - start;
+        diff as u128 * 1_000_000_000 / RISCV_TIMEBASE_FREQ as u128
     }
 
     fn cpu_counter() -> u64 {
@@ -39,3 +42,13 @@ impl Delay for super::RV64 {
         cycle
     }
 }
+
+pub(super) unsafe fn init_primary() {
+    let mtime = ACLINT_MTIME_BASE as *const u64;
+    let count = core::ptr::read_volatile(mtime);
+    COUNT_START.store(count, Ordering::Release);
+}
+
+pub(super) unsafe fn init_non_primary() {
+    // No additional initialization needed for non-primary CPUs
+}
diff --git a/awkernel_lib/src/arch/rv64/fdt.rs b/awkernel_lib/src/arch/rv64/fdt.rs
new file mode 100644
index 000000000..70e7b4b87
--- /dev/null
+++ b/awkernel_lib/src/arch/rv64/fdt.rs
@@ -0,0 +1,157 @@
+//! Simple Flattened Device Tree (FDT) helpers for memory detection
+//!
+//! This module now leverages the `fdt` crate to extract memory information
+//! while keeping a small, RV64-centric surface for the rest of the kernel.
+
+use fdt::Fdt;
+
+/// FDT Magic number (0xd00dfeed in big-endian)
+const FDT_MAGIC: u32 = 0xd00dfeed;
+
+const RISCV_DRAM_BASE: u64 = 0x8000_0000;
+const RISCV_DRAM_MAX_BASE: u64 = 0x9000_0000;
+const MAX_REASONABLE_REGION_SIZE: u64 = 0x1_0000_0000; // 4GB
+
+/// Memory region information
+#[derive(Debug, Clone, Copy)]
+pub struct MemoryRegion {
+    pub base: u64,
+    pub size: u64,
+}
+
+/// Detect memory size from device tree blob
+///
+/// # Safety
+///
+/// This function reads from the DTB address which must point to valid memory.
+pub unsafe fn detect_memory_from_dtb(dtb_addr: usize) -> Option<MemoryRegion> {
+    // Check if DTB address looks valid (aligned and non-zero)
+    if dtb_addr == 0 || dtb_addr & 0x3 != 0 {
+        return None;
+    }
+
+    let fdt = unsafe { Fdt::from_ptr(dtb_addr as *const u8).ok()? };
+    let memory = fdt.memory();
+    let mut regions = memory.regions().filter_map(convert_region);
+
+    regions.find(is_reasonable_region)
+}
+
+fn convert_region(region: fdt::standard_nodes::MemoryRegion) -> Option<MemoryRegion> {
+    let size = region.size? as u64;
+    if size == 0 {
+        return None;
+    }
+
+    Some(MemoryRegion {
+        base: region.starting_address as usize as u64,
+        size,
+    })
+}
+
+fn is_reasonable_region(region: &MemoryRegion) -> bool {
+    region.base >= RISCV_DRAM_BASE
+        && region.base < RISCV_DRAM_MAX_BASE
+        && region.size > 0
+        && region.size < MAX_REASONABLE_REGION_SIZE
+}
+
+/// Probe common DTB locations used by QEMU and firmware
+pub unsafe fn probe_dtb_locations() -> Option<usize> {
+    // Common DTB locations for RISC-V:
+    // 1. 0x82000000 - QEMU often places DTB here
+    // 2. 0x88000000 - Alternative location
+    // 3. 0x87000000 - Another common location
+    // 4. 0xBFE00000 - End of 1GB RAM (QEMU default for some modes)
+
+    let locations = [
+        0xBFE00000, // Try end of 1GB first (QEMU -bios none)
+        0x82000000, 0x88000000, 0x87000000, 0xBFF00000, // Alternative end location
+    ];
+
+    for &addr in &locations {
+        // Check if this looks like a valid DTB
+        let magic_ptr = addr as *const u32;
+        if let Some(magic) = unsafe { magic_ptr.as_ref() } {
+            if u32::from_be(*magic) == FDT_MAGIC {
+                return Some(addr);
+            }
+        }
+    }
+
+    None
+}
+
+/// Probe available memory by checking if addresses are accessible
+///
+/// This is a fallback when DTB is not available. We probe memory
+/// in chunks to find where RAM ends.
+pub unsafe fn probe_memory_size() -> Option<u64> {
+    const DRAM_BASE: usize = 0x80000000;
+    const PROBE_STEP: usize = 64 * 1024 * 1024; // 64MB chunks
+    const MAX_PROBE: usize = 2 * 1024 * 1024 * 1024; // Don't probe beyond 2GB
+
+    let mut current = DRAM_BASE + PROBE_STEP;
+    let mut last_valid = DRAM_BASE;
+
+    while current < DRAM_BASE + MAX_PROBE {
+        // Use read-write-verify test pattern for reliable memory detection
+        // This is the standard approach to distinguish valid RAM from MMIO/ROM/unmapped regions
+        let test_addr = current as *mut u32;
+
+        if let Some(ptr) = test_addr.as_mut() {
+            // Save the original value
+            let original = core::ptr::read_volatile(ptr);
+
+            // Write alternating bit pattern (industry standard memory test pattern)
+            core::ptr::write_volatile(ptr, 0xA5A5A5A5);
+
+            // Read back and verify
+            let test_read = core::ptr::read_volatile(ptr);
+
+            // Restore original value to avoid corrupting memory
+            core::ptr::write_volatile(ptr, original);
+
+            // If write was successful, this is valid writable RAM
+            if test_read == 0xA5A5A5A5 {
+                last_valid = current;
+            } else {
+                // Write failed - likely MMIO, ROM, or invalid address
+                break;
+            }
+        } else {
+            break;
+        }
+
+        current += PROBE_STEP;
+    }
+
+    if last_valid > DRAM_BASE {
+        Some((last_valid - DRAM_BASE) as u64)
+    } else {
+        None
+    }
+}
+
+/// Detect the number of CPUs from the device tree blob
+///
+/// # Safety
+///
+/// This function reads from the DTB address which must point to valid memory.
+pub unsafe fn detect_cpu_count_from_dtb(dtb_addr: usize) -> Option<usize> {
+    // Check if DTB address looks valid (aligned and non-zero)
+    if dtb_addr == 0 || dtb_addr & 0x3 != 0 {
+        return None;
+    }
+
+    let fdt = unsafe { Fdt::from_ptr(dtb_addr as *const u8).ok()? };
+
+    // Count CPUs by iterating through /cpus node
+    let cpu_count = fdt.cpus().count();
+
+    if cpu_count > 0 {
+        Some(cpu_count)
+    } else {
+        None
+    }
+}
diff --git a/awkernel_lib/src/arch/rv64/interrupt.rs b/awkernel_lib/src/arch/rv64/interrupt.rs
index 72ade36d8..f82576645 100644
--- a/awkernel_lib/src/arch/rv64/interrupt.rs
+++ b/awkernel_lib/src/arch/rv64/interrupt.rs
@@ -1,5 +1,7 @@
 use crate::interrupt::Interrupt;
 
+// RV64 runs in M-mode without OpenSBI, so we use mstatus
+// MIE (Machine Interrupt Enable) is bit 3 (0x08) in mstatus
 impl Interrupt for super::RV64 {
     fn get_flag() -> usize {
         let x: usize;
diff --git a/awkernel_lib/src/arch/rv64/vm.rs b/awkernel_lib/src/arch/rv64/vm.rs
index ed2d32de8..bf9797691 100644
--- a/awkernel_lib/src/arch/rv64/vm.rs
+++ b/awkernel_lib/src/arch/rv64/vm.rs
@@ -5,7 +5,9 @@ use crate::addr::{virt_addr::VirtAddr, Addr};
 use crate::{console::unsafe_puts, paging::PAGESIZE};
 use alloc::vec::Vec;
 use core::arch::asm;
+use core::sync::atomic::{AtomicUsize, Ordering};
 
+#[allow(dead_code)]
 extern "C" {
     fn stext();
     fn etext();
@@ -153,6 +155,16 @@ impl MemorySet {
         }
     }
 
+    pub fn get_heap_size(&self) -> Option<usize> {
+        // Return heap size if calculated during init_kernel_heap
+        let size = HEAP_SIZE.load(core::sync::atomic::Ordering::Relaxed);
+        if size == 0 {
+            None
+        } else {
+            Some(size)
+        }
+    }
+
     #[allow(dead_code)]
     pub fn token(&self) -> usize {
         self.page_table.token()
@@ -261,6 +273,7 @@ impl MemorySet {
             None,
         );
 
+        // Note: Heap mapping is created separately in calculate_dynamic_heap_size()
         memory_set
     }
 
@@ -271,7 +284,7 @@ impl MemorySet {
             asm!("sfence.vma");
         }
     }
-    #[allow(dead_code)]
+
     pub fn translate(&mut self, vpn: VirtPageNum) -> Option<PageTableEntry> {
         self.page_table.translate(vpn)
     }
@@ -300,16 +313,84 @@ impl MapArea {
     }
 }
 
-// Static kernel memory set
+// Static kernel memory set and heap size tracking
 use crate::sync::mcs::MCSNode;
 use crate::sync::mutex::Mutex;
 
 pub static KERNEL_SPACE: Mutex<Option<MemorySet>> = Mutex::new(None);
+static HEAP_SIZE: AtomicUsize = AtomicUsize::new(0);
 
 pub fn init_kernel_space() {
     let mut node = MCSNode::new();
     let mut kernel_space = KERNEL_SPACE.lock(&mut node);
-    *kernel_space = Some(MemorySet::new_kernel());
+    let mut memory_set = MemorySet::new_kernel();
+
+    // Calculate dynamic heap size after kernel mapping
+    let heap_size = calculate_dynamic_heap_size(&mut memory_set);
+    HEAP_SIZE.store(heap_size, Ordering::Relaxed);
+
+    *kernel_space = Some(memory_set);
+}
+
+fn calculate_dynamic_heap_size(memory_set: &mut MemorySet) -> usize {
+    extern "C" {
+        fn ekernel();
+    }
+    use crate::addr::{phy_addr::PhyAddr, Addr};
+
+    // Get dynamically detected memory end
+    let memory_end_addr = super::get_memory_end();
+
+    // Calculate available memory after kernel
+    let kernel_end = PhyAddr::from_usize(ekernel as *const () as usize);
+    let memory_end = PhyAddr::from_usize(memory_end_addr as usize);
+
+    // Start heap mapping after kernel, aligned to page boundary
+    let heap_start_phy =
+        PhyAddr::from_usize((kernel_end.as_usize() + PAGESIZE - 1) & !(PAGESIZE - 1));
+
+    if memory_end <= heap_start_phy {
+        unsafe {
+            unsafe_puts("Warning: No memory available for heap\r\n");
+        }
+        return 0;
+    }
+
+    // Calculate heap region
+    let heap_start = heap_start_phy.as_usize();
+    let heap_end = memory_end.as_usize();
+    let heap_size = heap_end - heap_start;
+
+    unsafe {
+        unsafe_puts("Dynamic heap calculation:\r\n");
+        unsafe_puts("  Kernel end: 0x");
+        crate::console::unsafe_print_hex_u64(kernel_end.as_usize() as u64);
+        unsafe_puts("\r\n  Heap start: 0x");
+        crate::console::unsafe_print_hex_u64(heap_start as u64);
+        unsafe_puts("\r\n  Heap end: 0x");
+        crate::console::unsafe_print_hex_u64(heap_end as u64);
+        unsafe_puts("\r\n  Heap size: 0x");
+        crate::console::unsafe_print_hex_u64(heap_size as u64);
+        unsafe_puts("\r\n");
+        unsafe_puts("Creating dedicated heap mapping...\r\n");
+    }
+
+    // Create dedicated heap mapping using memory_set
+    memory_set.push(
+        MapArea::new(
+            VirtAddr::from_usize(heap_start),
+            VirtAddr::from_usize(heap_end),
+            MapType::Identical,
+            MapPermission::R | MapPermission::W,
+        ),
+        None,
+    );
+
+    unsafe {
+        unsafe_puts("Heap mapping created successfully\r\n");
+    }
+
+    heap_size
 }
 
 pub fn activate_kernel_space() {
@@ -319,7 +400,7 @@ pub fn activate_kernel_space() {
         kernel_space.activate();
     }
 }
-#[allow(dead_code)]
+
 pub fn kernel_token() -> usize {
     let mut node = MCSNode::new();
     let kernel_space = KERNEL_SPACE.lock(&mut node);
@@ -329,3 +410,37 @@ pub fn kernel_token() -> usize {
         panic!("Kernel space not initialized")
     }
 }
+
+pub fn get_heap_size() -> usize {
+    // Try to get heap size from kernel space MemorySet first
+    let mut node = MCSNode::new();
+    let kernel_space = KERNEL_SPACE.lock(&mut node);
+    if let Some(ref space) = *kernel_space {
+        if let Some(size) = space.get_heap_size() {
+            return size;
+        }
+    }
+
+    // Fallback: read from HEAP_SIZE atomic if not in MemorySet
+    let heap_size = HEAP_SIZE.load(Ordering::Relaxed);
+    if heap_size == 0 {
+        // Last resort: calculate from memory end
+        extern "C" {
+            fn ekernel();
+        }
+        use crate::addr::{phy_addr::PhyAddr, Addr};
+
+        let memory_end_addr = super::get_memory_end();
+
+        let kernel_end = PhyAddr::from_usize(ekernel as *const () as usize);
+        let memory_end = PhyAddr::from_usize(memory_end_addr as usize);
+
+        if memory_end > kernel_end {
+            memory_end.as_usize() - kernel_end.as_usize()
+        } else {
+            0
+        }
+    } else {
+        heap_size
+    }
+}
diff --git a/awkernel_lib/src/interrupt.rs b/awkernel_lib/src/interrupt.rs
index 12aaf798a..ae2fdce4e 100644
--- a/awkernel_lib/src/interrupt.rs
+++ b/awkernel_lib/src/interrupt.rs
@@ -403,6 +403,57 @@ pub fn handle_preemption() {
     preemption();
 }
 
+/// Handle all pending interrupt requests for RISC-V.
+/// This is called from the M-mode interrupt handler.
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+pub fn handle_irqs(is_task: bool) {
+    use crate::{heap, unwind::catch_unwind};
+    use core::mem::transmute;
+
+    let handlers = IRQ_HANDLERS.read();
+    let mut need_preemption = false;
+
+    let controller = INTERRUPT_CONTROLLER.read();
+    if let Some(ctrl) = controller.as_ref() {
+        let iter = ctrl.pending_irqs();
+        drop(controller); // unlock
+
+        // Use the primary allocator.
+        #[cfg(not(feature = "std"))]
+        let _guard = {
+            let g = heap::TALLOC.save();
+            unsafe { heap::TALLOC.use_primary() };
+            g
+        };
+
+        for irq in iter {
+            if irq == PREEMPT_IRQ.load(Ordering::Relaxed) {
+                need_preemption = true;
+                continue;
+            }
+
+            if let Some((_, handler)) = handlers.get(&irq) {
+                if let Err(err) = catch_unwind(|| {
+                    handler(irq);
+                }) {
+                    log::warn!("an interrupt handler has been panicked\n{err:?}");
+                }
+            }
+        }
+    }
+
+    if need_preemption && is_task {
+        let ptr = PREEMPT_FN.load(Ordering::Relaxed);
+        let preemption = unsafe { transmute::<*mut (), fn()>(ptr) };
+        preemption();
+    }
+
+    // Because IPIs are edge-trigger,
+    // IPI sent during interrupt handlers will be ignored.
+    // To notify the interrupt again, we setup a timer.
+    crate::cpu::reset_wakeup_timer();
+}
+
 /// Disable interrupts and automatically restored the configuration.
 ///
 /// ```
diff --git a/kernel/ld/rv64-link.lds b/kernel/ld/rv64-link.lds
index c85dd2016..19d184e13 100644
--- a/kernel/ld/rv64-link.lds
+++ b/kernel/ld/rv64-link.lds
@@ -3,10 +3,11 @@ ENTRY(_start)
 
 SECTIONS
 {
-    . = 0x80200000;
+    /* Boot code at DRAM start for -bios none */
+    . = 0x80000000;
     PROVIDE (__executable_start = .);
     __ram_start = .;
-    
+
     PROVIDE (stext = .);
     .init : { KEEP(*(.init)) }
     .text : { *(.text .text.* .gnu.linkonce.t*) }
diff --git a/kernel/src/arch/rv64.rs b/kernel/src/arch/rv64.rs
index 58902c929..c2b8f9464 100644
--- a/kernel/src/arch/rv64.rs
+++ b/kernel/src/arch/rv64.rs
@@ -1,3 +1,5 @@
 pub mod config;
 mod console;
+mod interrupt_controller;
 mod kernel_main;
+mod timer;
diff --git a/kernel/src/arch/rv64/boot.S b/kernel/src/arch/rv64/boot.S
index 8e3095ac4..3740c2482 100644
--- a/kernel/src/arch/rv64/boot.S
+++ b/kernel/src/arch/rv64/boot.S
@@ -7,13 +7,28 @@
   .attribute arch, "rv64gc"
 
 _start:
+  # Read hart ID from mhartid CSR (M-mode)
+  csrr a0, mhartid
+  mv tp, a0    # save hartid to thread pointer for later use
+
+  # Preserve DTB pointer that QEMU/firmware passes in a1
+  la t0, dtb_ptr
+  sd a1, 0(t0)
+
+  # set up a simple trap handler for M-mode
+  la t0, early_trap_handler
+  csrw mtvec, t0
+
+  # Initialize mscratch to 0 (will be set properly later if needed)
+  csrw mscratch, zero
+
   # set up the 8MB initial stack for each cpu.
   li sp, 0x80800000
-  li a0, 0x00800000
-  csrr a1, mhartid
-  addi a1, a1, 1
-  mul a0, a0, a1
-  add sp, sp, a0
+  li t0, 0x00800000
+  mv t1, a0    # use hartid
+  addi t1, t1, 1
+  mul t0, t0, t1
+  add sp, sp, t0
   # clear the BSS
   la t0, __bss_start
   la t1, __bss_end
@@ -24,4 +39,203 @@ _start:
   # jump to kernel_main
   jal ra, kernel_main
 2:
-  j 2b
\ No newline at end of file
+  j 2b
+
+# M-mode trap handler - handles interrupts and exceptions
+.align 4
+early_trap_handler:
+  # Check if this is an interrupt or exception
+  csrr t0, mcause
+  blt t0, zero, handle_interrupt  # If MSB is set, it's an interrupt
+
+  # This is an exception - print debug info and halt
+  j unhandled_trap
+
+handle_interrupt:
+  # Get interrupt code (lower bits of mcause)
+  li t1, 0x7FFFFFFFFFFFFFFF
+  and t0, t0, t1
+
+  # Check interrupt type
+  li t1, 7  # M-mode timer interrupt
+  beq t0, t1, handle_timer_interrupt
+
+  li t1, 3  # M-mode software interrupt (IPI)
+  beq t0, t1, handle_software_interrupt
+
+  # Unknown interrupt - just return
+  mret
+
+handle_timer_interrupt:
+  # Save all registers that might be clobbered by the Rust handler
+  addi sp, sp, -256
+  sd ra, 0(sp)
+  sd t0, 8(sp)
+  sd t1, 16(sp)
+  sd t2, 24(sp)
+  sd a0, 32(sp)
+  sd a1, 40(sp)
+  sd a2, 48(sp)
+  sd a3, 56(sp)
+  sd a4, 64(sp)
+  sd a5, 72(sp)
+  sd a6, 80(sp)
+  sd a7, 88(sp)
+  sd t3, 96(sp)
+  sd t4, 104(sp)
+  sd t5, 112(sp)
+  sd t6, 120(sp)
+
+  # Disable timer interrupt temporarily by setting mtimecmp to max
+  # MTIMECMP base is 0x02004000, each hart has 8-byte register
+  csrr t0, mhartid
+  slli t0, t0, 3  # Multiply by 8
+  li t1, 0x02004000
+  add t0, t0, t1
+  li t1, -1
+  sd t1, 0(t0)
+
+  # Call the Rust timer handler
+  call riscv_handle_timer
+
+  # Restore registers
+  ld ra, 0(sp)
+  ld t0, 8(sp)
+  ld t1, 16(sp)
+  ld t2, 24(sp)
+  ld a0, 32(sp)
+  ld a1, 40(sp)
+  ld a2, 48(sp)
+  ld a3, 56(sp)
+  ld a4, 64(sp)
+  ld a5, 72(sp)
+  ld a6, 80(sp)
+  ld a7, 88(sp)
+  ld t3, 96(sp)
+  ld t4, 104(sp)
+  ld t5, 112(sp)
+  ld t6, 120(sp)
+  addi sp, sp, 256
+
+  # Return from interrupt
+  mret
+
+handle_software_interrupt:
+  # Save all registers that might be clobbered by the Rust handler
+  addi sp, sp, -256
+  sd ra, 0(sp)
+  sd t0, 8(sp)
+  sd t1, 16(sp)
+  sd t2, 24(sp)
+  sd a0, 32(sp)
+  sd a1, 40(sp)
+  sd a2, 48(sp)
+  sd a3, 56(sp)
+  sd a4, 64(sp)
+  sd a5, 72(sp)
+  sd a6, 80(sp)
+  sd a7, 88(sp)
+  sd t3, 96(sp)
+  sd t4, 104(sp)
+  sd t5, 112(sp)
+  sd t6, 120(sp)
+
+  # Clear the software interrupt by writing 0 to MSIP
+  # MSIP base is 0x02000000, each hart has 4-byte register
+  csrr t0, mhartid
+  slli t0, t0, 2  # Multiply by 4
+  li t1, 0x02000000
+  add t0, t0, t1
+  sw zero, 0(t0)   # Clear MSIP
+
+  # Call the Rust IPI handler
+  call riscv_handle_ipi
+
+  # Restore registers
+  ld ra, 0(sp)
+  ld t0, 8(sp)
+  ld t1, 16(sp)
+  ld t2, 24(sp)
+  ld a0, 32(sp)
+  ld a1, 40(sp)
+  ld a2, 48(sp)
+  ld a3, 56(sp)
+  ld a4, 64(sp)
+  ld a5, 72(sp)
+  ld a6, 80(sp)
+  ld a7, 88(sp)
+  ld t3, 96(sp)
+  ld t4, 104(sp)
+  ld t5, 112(sp)
+  ld t6, 120(sp)
+  addi sp, sp, 256
+
+  # Return from interrupt
+  mret
+
+unhandled_trap:
+  # Write 'TRAP!' to UART
+  li t0, 0x10000000
+  li t1, 'T'
+  sb t1, 0(t0)
+  li t1, 'R'
+  sb t1, 0(t0)
+  li t1, 'A'
+  sb t1, 0(t0)
+  li t1, 'P'
+  sb t1, 0(t0)
+  li t1, '!'
+  sb t1, 0(t0)
+  li t1, '\r'
+  sb t1, 0(t0)
+  li t1, '\n'
+  sb t1, 0(t0)
+
+  # Print mcause
+  csrr a0, mcause
+  call print_hex
+
+  # Print mepc
+  csrr a0, mepc
+  call print_hex
+
+  # Print mtval
+  csrr a0, mtval
+  call print_hex
+
+  # Infinite loop
+1:
+  j 1b
+
+# Helper function to print a hex value
+print_hex:
+  li t0, 0x10000000
+  li t2, 60  # shift amount: 60, 56, 52, ..., 0
+  li t3, 16  # counter
+1:
+  srl t1, a0, t2
+  andi t1, t1, 0xF
+  li t4, 10
+  blt t1, t4, 2f
+  addi t1, t1, 'A'-10
+  j 3f
+2:
+  addi t1, t1, '0'
+3:
+  sb t1, 0(t0)
+  addi t2, t2, -4
+  addi t3, t3, -1
+  bnez t3, 1b
+
+  # Print newline
+  li t1, '\r'
+  sb t1, 0(t0)
+  li t1, '\n'
+  sb t1, 0(t0)
+  ret
+
+  .section .data
+  .align 3
+  .global dtb_ptr
+dtb_ptr:
+  .dword 0
diff --git a/kernel/src/arch/rv64/config.rs b/kernel/src/arch/rv64/config.rs
index a6333deee..bc9ffe72c 100644
--- a/kernel/src/arch/rv64/config.rs
+++ b/kernel/src/arch/rv64/config.rs
@@ -1,4 +1,2 @@
-pub const HEAP_START: usize = 0x0008_8000_0000;
-
 pub const PREEMPT_IRQ: u16 = 0;
 pub const WAKEUP_IRQ: u16 = 1;
diff --git a/kernel/src/arch/rv64/interrupt_controller.rs b/kernel/src/arch/rv64/interrupt_controller.rs
new file mode 100644
index 000000000..1d7112d2d
--- /dev/null
+++ b/kernel/src/arch/rv64/interrupt_controller.rs
@@ -0,0 +1,197 @@
+use alloc::boxed::Box;
+use awkernel_lib::interrupt::InterruptController;
+use core::ptr::{read_volatile, write_volatile};
+use core::sync::atomic::Ordering;
+
+/// RISC-V PLIC (Platform-Level Interrupt Controller) implementation
+/// Combined with CLINT/ACLINT for IPI support
+pub struct RiscvPlic {
+    base_address: usize,
+    max_priority: u32,
+    num_sources: u16,
+}
+
+// CLINT/ACLINT base address for IPIs
+const ACLINT_BASE: usize = 0x0200_0000;
+const MSIP_OFFSET: usize = 0x0000; // Machine Software Interrupt Pending
+
+// Import the detected CPU count from kernel_main
+use super::kernel_main::NUM_CPUS;
+
+impl RiscvPlic {
+    /// Create a new RISC-V PLIC controller
+    pub const fn new(base_address: usize, num_sources: u16) -> Self {
+        Self {
+            base_address,
+            max_priority: 7, // Typical PLIC priority levels: 0-7
+            num_sources,
+        }
+    }
+
+    /// Get the priority register address for a given interrupt source
+    fn priority_reg(&self, source: u16) -> *mut u32 {
+        (self.base_address + (source as usize * 4)) as *mut u32
+    }
+
+    /// Get the enable register address for a given context and interrupt source
+    fn enable_reg(&self, context: usize, source: u16) -> *mut u32 {
+        let reg_index = source as usize / 32;
+        let base = self.base_address + 0x2000 + context * 0x80;
+        (base + reg_index * 4) as *mut u32
+    }
+
+    /// Get the threshold register address for a given context
+    fn threshold_reg(&self, context: usize) -> *mut u32 {
+        (self.base_address + 0x200000 + context * 0x1000) as *mut u32
+    }
+
+    /// Get the claim register address for a given context
+    fn claim_reg(&self, context: usize) -> *mut u32 {
+        (self.base_address + 0x200004 + context * 0x1000) as *mut u32
+    }
+
+    /// Set priority for an interrupt source
+    fn set_priority(&self, source: u16, priority: u32) {
+        if source > 0 && source <= self.num_sources && priority <= self.max_priority {
+            let reg = self.priority_reg(source);
+            unsafe { write_volatile(reg, priority) };
+        }
+    }
+
+    /// Enable interrupt for a specific context
+    fn enable_interrupt(&self, context: usize, source: u16) {
+        if source > 0 && source <= self.num_sources {
+            let reg = self.enable_reg(context, source);
+            let bit_pos = source as usize % 32;
+            unsafe {
+                let current = read_volatile(reg);
+                write_volatile(reg, current | (1 << bit_pos));
+            }
+        }
+    }
+
+    /// Disable interrupt for a specific context
+    fn disable_interrupt(&self, context: usize, source: u16) {
+        if source > 0 && source <= self.num_sources {
+            let reg = self.enable_reg(context, source);
+            let bit_pos = source as usize % 32;
+            unsafe {
+                let current = read_volatile(reg);
+                write_volatile(reg, current & !(1 << bit_pos));
+            }
+        }
+    }
+
+    /// Get the current hart ID (CPU ID)
+    fn get_hart_id(&self) -> usize {
+        // Use mhartid CSR to get hart ID
+        let hart_id: usize;
+        unsafe {
+            core::arch::asm!("csrr {}, mhartid", out(reg) hart_id);
+        }
+        hart_id
+    }
+
+    /// Get supervisor mode context for current hart
+    fn get_supervisor_context(&self) -> usize {
+        // Typically supervisor mode context = hartid * 2 + 1
+        self.get_hart_id() * 2 + 1
+    }
+
+    /// Send software interrupt (IPI) to a specific hart
+    fn send_software_interrupt(&self, hart_id: u32) {
+        // MSIP register for each hart is at ACLINT_BASE + MSIP_OFFSET + (hart_id * 4)
+        let msip_addr = (ACLINT_BASE + MSIP_OFFSET + (hart_id as usize * 4)) as *mut u32;
+        unsafe {
+            write_volatile(msip_addr, 1);
+        }
+    }
+
+    /// Enable machine software interrupts
+    fn enable_software_interrupts(&self) {
+        unsafe {
+            // Set MIE.MSIE (Machine Software Interrupt Enable) bit (bit 3)
+            core::arch::asm!("csrrs t0, mie, {}", in(reg) 1 << 3);
+        }
+    }
+}
+
+impl InterruptController for RiscvPlic {
+    fn enable_irq(&mut self, irq: u16) {
+        // Set a reasonable priority for the interrupt
+        self.set_priority(irq, 1);
+
+        // Enable for supervisor mode (we're running in supervisor mode)
+        let context = self.get_supervisor_context();
+        self.enable_interrupt(context, irq);
+    }
+
+    fn disable_irq(&mut self, irq: u16) {
+        let context = self.get_supervisor_context();
+        self.disable_interrupt(context, irq);
+    }
+
+    fn pending_irqs(&self) -> Box<dyn Iterator<Item = u16>> {
+        // Check pending interrupts by claiming them
+        let context = self.get_supervisor_context();
+        let claim_reg = self.claim_reg(context);
+
+        let mut pending = alloc::vec::Vec::new();
+
+        unsafe {
+            let claimed = read_volatile(claim_reg);
+            if claimed != 0 {
+                pending.push(claimed as u16);
+                // Complete the interrupt (write back the claim)
+                write_volatile(claim_reg, claimed);
+            }
+        }
+
+        Box::new(pending.into_iter())
+    }
+
+    fn send_ipi(&mut self, _irq: u16, cpu_id: u32) {
+        // Send machine software interrupt to the target hart
+        self.send_software_interrupt(cpu_id);
+    }
+
+    fn send_ipi_broadcast(&mut self, _irq: u16) {
+        // Send IPI to all CPUs
+        let num_cpus = NUM_CPUS.load(Ordering::Acquire) as u32;
+        for cpu_id in 0..num_cpus {
+            self.send_software_interrupt(cpu_id);
+        }
+    }
+
+    fn send_ipi_broadcast_without_self(&mut self, _irq: u16) {
+        // Send IPI to all CPUs except current
+        let current_hart = self.get_hart_id() as u32;
+        let num_cpus = NUM_CPUS.load(Ordering::Acquire) as u32;
+        for cpu_id in 0..num_cpus {
+            if cpu_id != current_hart {
+                self.send_software_interrupt(cpu_id);
+            }
+        }
+    }
+
+    fn init_non_primary(&mut self) {
+        // Set threshold to 0 to accept all interrupts
+        let context = self.get_supervisor_context();
+        let threshold_reg = self.threshold_reg(context);
+        unsafe { write_volatile(threshold_reg, 0) };
+
+        // Enable software interrupts for IPIs
+        self.enable_software_interrupts();
+    }
+
+    fn irq_range(&self) -> (u16, u16) {
+        // PLIC interrupt sources typically range from 1 to num_sources
+        (1, self.num_sources + 1)
+    }
+
+    fn irq_range_for_pnp(&self) -> (u16, u16) {
+        // Reserve lower IRQs for system use, higher for PnP
+        let start = self.num_sources / 2;
+        (start, self.num_sources + 1)
+    }
+}
diff --git a/kernel/src/arch/rv64/kernel_main.rs b/kernel/src/arch/rv64/kernel_main.rs
index 03955b43f..d758190e9 100644
--- a/kernel/src/arch/rv64/kernel_main.rs
+++ b/kernel/src/arch/rv64/kernel_main.rs
@@ -1,76 +1,116 @@
 use super::console;
-use crate::{
-    config::{BACKUP_HEAP_SIZE, HEAP_START},
-    kernel_info::KernelInfo,
-};
+use crate::{config::BACKUP_HEAP_SIZE, kernel_info::KernelInfo};
 use awkernel_lib::{cpu, heap};
 use core::{
     arch::global_asm,
-    fmt::Write,
-    //    mem::MaybeUninit,
     sync::atomic::{AtomicBool, Ordering},
 };
-use ns16550a::Uart;
 
 const UART_BASE: usize = 0x1000_0000;
 
-const HEAP_SIZE: usize = 1024 * 1024 * 512;
-
-// TODO: set initial stack 4MB for each CPU on 0x8040_0000. see boot.S
-// const MAX_HARTS: usize = 8;
-// const INITIAL_STACK: usize = 0x8040_0000;
-// const INITIAL_STACK_SIZE: usize = 0x0040_0000;
-// #[repr(align(4096))]
-// struct InitialStack([MaybeUninit<u8>; INITIAL_STACK_SIZE * MAX_HARTS]);
-// #[no_mangle]
-// static INITIAL_STACK: InitialStack = unsafe { MaybeUninit::uninit().assume_init() };
+extern "C" {
+    static dtb_ptr: usize;
+}
 
 static PRIMARY_INITIALIZED: AtomicBool = AtomicBool::new(false);
+pub(super) static NUM_CPUS: core::sync::atomic::AtomicUsize =
+    core::sync::atomic::AtomicUsize::new(4);
 
 global_asm!(include_str!("boot.S"));
 
+/// M-mode software interrupt (IPI) handler called from assembly
+///
+/// # Safety
+///
+/// This function is called from the M-mode trap handler in boot.S
 #[no_mangle]
-pub unsafe extern "C" fn kernel_main() {
-    unsafe { crate::config::init() };
+pub unsafe extern "C" fn riscv_handle_ipi() {
+    // Handle all pending interrupts, including IPI preemption
+    awkernel_lib::interrupt::handle_irqs(true);
+}
 
-    let hartid: usize = cpu::cpu_id();
-    if hartid == 0 {
-        primary_hart(hartid);
-    } else {
-        non_primary_hart(hartid);
-    }
+/// M-mode timer interrupt handler called from assembly
+///
+/// # Safety
+///
+/// This function is called from the M-mode trap handler in boot.S
+#[no_mangle]
+pub unsafe extern "C" fn riscv_handle_timer() {
+    // Timer interrupt is already disabled in assembly by setting mtimecmp to max
+    // Handle any pending interrupts (the timer handler will be invoked if registered)
+    awkernel_lib::interrupt::handle_irqs(true);
 }
 
-unsafe fn primary_hart(hartid: usize) {
-    // setup interrupt; TODO;
+/// Write to UART during early boot for debugging
+///
+/// # Safety
+///
+/// UART must be initialized before calling this function
+#[inline]
+unsafe fn uart_debug_puts(s: &str) {
+    let uart_base = UART_BASE as *mut u8;
+    const LSR: usize = 5;
+    const THR: usize = 0;
 
-    super::console::init_port(UART_BASE);
+    for byte in s.bytes() {
+        while uart_base.add(LSR).read_volatile() & 0x20 == 0 {
+            core::hint::spin_loop();
+        }
+        uart_base.add(THR).write_volatile(byte);
+    }
+}
 
-    // Initialize memory management (page allocator)
-    awkernel_lib::arch::rv64::init_page_allocator();
+fn register_boot_dtb_pointer() {
+    unsafe {
+        let addr = dtb_ptr;
+        if addr != 0 {
+            awkernel_lib::arch::rv64::set_boot_dtb_ptr(addr);
+        }
+    }
+}
 
-    // Initialize virtual memory system
-    awkernel_lib::arch::rv64::init_kernel_space();
+/// Initialize UART for early debugging and console output.
+///
+/// # Safety
+///
+/// Must be called during early boot on the primary hart.
+unsafe fn init_uart() {
+    let uart_base = UART_BASE as *mut u8;
 
-    // Activate virtual memory (enable MMU and page tables)
-    awkernel_lib::arch::rv64::activate_kernel_space();
+    // 16550 UART register offsets
+    const IER: usize = 1;
+    const FCR: usize = 2;
+    const LCR: usize = 3;
+    const MCR: usize = 4;
+    const LSR: usize = 5;
+    const THR: usize = 0;
 
-    // Verify VM system is working by getting kernel token
-    let _kernel_token = awkernel_lib::arch::rv64::get_kernel_token();
+    // Initialize 16550 UART for early debugging
+    uart_base.add(IER).write_volatile(0x00); // Disable interrupts
+    uart_base.add(LCR).write_volatile(0x80); // Enable DLAB
+    uart_base.add(THR).write_volatile(0x03); // Baud rate divisor low
+    uart_base.add(IER).write_volatile(0x00); // Baud rate divisor high
+    uart_base.add(LCR).write_volatile(0x03); // 8N1
+    uart_base.add(FCR).write_volatile(0xC7); // Enable FIFO
+    uart_base.add(MCR).write_volatile(0x0B); // RTS/DTR
 
-    // setup the VM
-    let backup_start = HEAP_START;
-    let backup_size = BACKUP_HEAP_SIZE;
-    let primary_start = HEAP_START + BACKUP_HEAP_SIZE;
-    let primary_size = HEAP_SIZE;
+    // Helper to write strings during early boot
+    let uart_puts = |s: &str| {
+        for byte in s.bytes() {
+            while uart_base.add(LSR).read_volatile() & 0x20 == 0 {
+                core::hint::spin_loop();
+            }
+            uart_base.add(THR).write_volatile(byte);
+        }
+    };
+
+    uart_puts("\r\n=== AWkernel RV64 Starting ===\r\n");
 
-    // enable heap allocator
-    heap::init_primary(primary_start, primary_size);
-    heap::init_backup(backup_start, backup_size);
-    heap::TALLOC.use_primary_then_backup(); // use backup allocator
+    super::console::init_port(UART_BASE);
+    uart_puts("Console port initialized\r\n");
 
-    // initialize serial device and dump booting logo
-    let mut port = Uart::new(UART_BASE);
+    // Initialize full UART driver
+    let mut port = ns16550a::Uart::new(UART_BASE);
     port.init(
         ns16550a::WordLength::EIGHT,
         ns16550a::StopBits::ONE,
@@ -82,39 +122,196 @@ unsafe fn primary_hart(hartid: usize) {
         ns16550a::Divisor::BAUD115200,
     );
 
-    let _ = port.write_str("\r\nAwkernel is booting\r\n\r\n");
+    use core::fmt::Write;
+    let _ = port.write_str("UART driver initialized\r\n");
+}
 
-    // initialize console driver to which log messages are dumped
+/// Initialize memory management including page allocator and virtual memory.
+///
+/// # Safety
+///
+/// Must be called after heap initialization on the primary hart.
+unsafe fn init_memory_management() {
+    uart_debug_puts("Initializing page allocator...\r\n");
+    awkernel_lib::arch::rv64::init_page_allocator();
+
+    uart_debug_puts("Initializing kernel space...\r\n");
+    awkernel_lib::arch::rv64::init_kernel_space();
+
+    uart_debug_puts("Activating kernel space...\r\n");
+    awkernel_lib::arch::rv64::activate_kernel_space();
+
+    uart_debug_puts("Virtual memory system initialized\r\n");
+}
+
+/// Setup dynamic heap allocation based on available memory.
+///
+/// # Safety
+///
+/// Must be called before memory management initialization on the primary hart.
+unsafe fn init_heap_allocation() {
+    uart_debug_puts("Setting up heap allocation...\r\n");
+
+    extern "C" {
+        fn ekernel();
+    }
+
+    let heap_start = (ekernel as *const () as usize + 0xfff) & !0xfff; // Align to 4K
+    let backup_size = BACKUP_HEAP_SIZE;
+    let total_heap_size = awkernel_lib::arch::rv64::get_heap_size();
+
+    if total_heap_size <= backup_size {
+        // Use minimal heap if not enough memory
+        uart_debug_puts("Using minimal heap configuration\r\n");
+        let primary_size = 64 * 1024 * 1024; // 64MB minimum
+        heap::init_primary(heap_start + backup_size, primary_size);
+        heap::init_backup(heap_start, backup_size);
+    } else {
+        // Use dynamic calculation
+        uart_debug_puts("Using dynamic heap configuration\r\n");
+        let primary_size = total_heap_size - backup_size;
+        heap::init_primary(heap_start + backup_size, primary_size);
+        heap::init_backup(heap_start, backup_size);
+    }
+
+    heap::TALLOC.use_primary_then_backup();
+    uart_debug_puts("Heap allocation complete\r\n");
+}
+
+/// Initialize timer and interrupt controller for RV64.
+///
+/// # Safety
+///
+/// Must be called after heap and memory management initialization on the primary hart.
+unsafe fn init_timer_and_interrupts() {
+    use super::interrupt_controller::RiscvPlic;
+    use super::timer::RiscvTimer;
+    use alloc::boxed::Box;
+
+    uart_debug_puts("Initializing RISC-V interrupt controller...\r\n");
+
+    // RISC-V PLIC (Platform-Level Interrupt Controller) base address
+    // This should match the device tree or platform specification
+    const PLIC_BASE: usize = 0x0c000000;
+    const NUM_SOURCES: u16 = 128; // Typical PLIC configuration
+    const TIMER_IRQ: u16 = 5; // Machine timer interrupt is typically IRQ 5 for PLIC
+
+    // Initialize and register interrupt controller
+    let plic = Box::new(RiscvPlic::new(PLIC_BASE, NUM_SOURCES));
+    awkernel_lib::interrupt::register_interrupt_controller(plic);
+
+    uart_debug_puts("Initializing RISC-V timer...\r\n");
+
+    // Initialize and register timer
+    let timer = Box::new(RiscvTimer::new(TIMER_IRQ));
+    awkernel_lib::timer::register_timer(timer);
+
+    uart_debug_puts("Timer and interrupt controller initialized\r\n");
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn kernel_main() {
+    unsafe { crate::config::init() };
+
+    register_boot_dtb_pointer();
+
+    let hartid: usize = cpu::cpu_id();
+    if hartid == 0 {
+        primary_hart(hartid);
+    } else {
+        non_primary_hart(hartid);
+    }
+}
+
+/// Primary CPU initialization sequence.
+/// 1. Initialize UART for early debugging
+/// 2. Setup heap allocation FIRST (required for page tables)
+/// 3. Initialize memory management (page allocator and virtual memory)
+/// 4. Initialize architecture-specific features
+/// 5. Initialize console driver
+/// 6. Initialize timer and interrupt controller
+/// 7. Detect CPU count from device tree
+/// 8. Wake up secondary CPUs
+/// 9. Start the kernel main function
+///
+/// # Safety
+///
+/// Must be called exactly once by the primary hart during boot.
+unsafe fn primary_hart(hartid: usize) {
+    // 1. Initialize UART for early debugging
+    init_uart();
+
+    // 2. Setup heap allocation FIRST - page tables need this!
+    init_heap_allocation();
+
+    // 3. Initialize memory management (now that heap exists)
+    init_memory_management();
+
+    // 4. Initialize architecture-specific features
+    awkernel_lib::arch::rv64::init_primary();
+
+    // 5. Initialize console driver
     console::init(UART_BASE);
 
-    // switch to S-Mode; TODO;
-    // * currntly this impl. holds both kernel and userland
-    // * in M-Mode, which is the highest priority.
+    // 6. Initialize timer and interrupt controller
+    init_timer_and_interrupts();
+
+    // Enable software interrupts for IPIs on primary hart
+    unsafe {
+        core::arch::asm!("csrrs t0, mie, {}", in(reg) 1 << 3);
+    }
 
-    // wake up another harts
+    log::info!("AWkernel RV64 primary CPU initialization complete");
+
+    // 7. Detect CPU count from device tree and store it
+    let num_cpu = awkernel_lib::arch::rv64::detect_cpu_count();
+    NUM_CPUS.store(num_cpu, Ordering::Release);
+
+    // 8. Wake up secondary CPUs
     PRIMARY_INITIALIZED.store(true, Ordering::SeqCst);
 
     let kernel_info = KernelInfo {
         info: (),
         cpu_id: hartid,
-        num_cpu: 4, // TODO: get the number of CPUs
+        num_cpu,
     };
 
+    // 9. Start the kernel main function
     crate::main::<()>(kernel_info);
 }
 
+/// Secondary CPU initialization sequence.
+/// 1. Wait for primary CPU to complete initialization
+/// 2. Initialize architecture-specific features for non-primary CPUs
+/// 3. Setup heap allocator usage
+/// 4. Get CPU count from primary hart
+/// 5. Start the kernel main function
+///
+/// # Safety
+///
+/// Must be called by non-primary harts during boot.
 unsafe fn non_primary_hart(hartid: usize) {
+    // 1. Wait for primary CPU to complete initialization
     while !PRIMARY_INITIALIZED.load(Ordering::SeqCst) {
         core::hint::spin_loop();
     }
 
-    heap::TALLOC.use_primary_then_backup(); // use backup allocator
+    // 2. Initialize architecture-specific features for non-primary CPUs
+    awkernel_lib::arch::rv64::init_non_primary();
+    awkernel_lib::interrupt::init_non_primary();
+
+    // 3. Setup heap allocator usage
+    heap::TALLOC.use_primary_then_backup();
+
+    // 4. Get CPU count detected by primary hart
+    let num_cpu = NUM_CPUS.load(Ordering::Acquire);
 
     let kernel_info = KernelInfo {
         info: (),
         cpu_id: hartid,
-        num_cpu: 4, // TODO: get the number of CPUs
+        num_cpu,
     };
 
+    // 5. Start the kernel main function
     crate::main::<()>(kernel_info);
 }
diff --git a/kernel/src/arch/rv64/timer.rs b/kernel/src/arch/rv64/timer.rs
new file mode 100644
index 000000000..631215e09
--- /dev/null
+++ b/kernel/src/arch/rv64/timer.rs
@@ -0,0 +1,85 @@
+use core::ptr::{read_volatile, write_volatile};
+use core::time::Duration;
+
+/// RISC-V Timer Implementation
+/// Uses MTIME (machine time) and MTIMECMP (machine time compare) registers
+pub struct RiscvTimer {
+    irq: u16,
+}
+
+// RISC-V timer memory-mapped register addresses
+// These should match the device tree or platform configuration
+const ACLINT_MTIME_BASE: usize = 0x0200_0000 + 0x0000bff8;
+const ACLINT_MTIMECMP_BASE: usize = 0x0200_0000 + 0x00004000;
+const RISCV_TIMEBASE_FREQ: u64 = 10_000_000; // 10 MHz
+
+impl RiscvTimer {
+    pub const fn new(irq: u16) -> Self {
+        RiscvTimer { irq }
+    }
+
+    /// Get the current machine time
+    fn get_mtime() -> u64 {
+        let mtime = ACLINT_MTIME_BASE as *const u64;
+        unsafe { read_volatile(mtime) }
+    }
+
+    /// Get the current hart ID
+    fn get_hart_id() -> usize {
+        let hart_id: usize;
+        unsafe {
+            core::arch::asm!("csrr {}, mhartid", out(reg) hart_id);
+        }
+        hart_id
+    }
+
+    /// Set the machine time compare value
+    fn set_mtimecmp(&self, value: u64) {
+        // Each hart has its own MTIMECMP register at ACLINT_MTIMECMP_BASE + (hartid * 8)
+        let hart_id = Self::get_hart_id();
+        let mtimecmp = (ACLINT_MTIMECMP_BASE + (hart_id * 8)) as *mut u64;
+        unsafe { write_volatile(mtimecmp, value) };
+    }
+
+    /// Enable machine timer interrupt
+    fn enable_mti(&self) {
+        unsafe {
+            // Set MIE.MTIE (Machine Timer Interrupt Enable) bit
+            core::arch::asm!("csrrs t0, mie, {}", in(reg) 1 << 7);
+        }
+    }
+
+    /// Disable machine timer interrupt
+    fn disable_mti(&self) {
+        unsafe {
+            // Clear MIE.MTIE (Machine Timer Interrupt Enable) bit
+            core::arch::asm!("csrrc t0, mie, {}", in(reg) 1 << 7);
+        }
+    }
+}
+
+impl awkernel_lib::timer::Timer for RiscvTimer {
+    fn reset(&self, dur: Duration) {
+        let current_time = Self::get_mtime();
+        let ticks = (dur.as_nanos() as u64 * RISCV_TIMEBASE_FREQ) / 1_000_000_000;
+        let target_time = current_time.saturating_add(ticks);
+
+        // Set the compare value
+        self.set_mtimecmp(target_time);
+
+        // Enable machine timer interrupt
+        self.enable_mti();
+    }
+
+    fn irq_id(&self) -> u16 {
+        self.irq
+    }
+
+    fn disable(&self) {
+        // Disable machine timer interrupt
+        self.disable_mti();
+
+        // Set MTIMECMP to maximum value to prevent spurious interrupts
+        self.set_mtimecmp(u64::MAX);
+    }
+}