Skip to content

Commit 98e4a92

Browse files
mazong1123Copilot
andauthored
[Windows] Search for closest free page in x64 JIT memory allocation (#124)
The old x86_64 Windows implementation of allocate_jit_memory scanned linearly from func_addr - 2GB upward, which often allocated memory ~2GB away from the function. This could land in/near the stack region, disrupting the stack guard page and causing STATUS_STACK_OVERFLOW (0xc00000fd) during parallel test execution. This change replaces the linear scan with a bidirectional closest-first search (matching the existing aarch64 Windows and Unix implementations). The allocator now searches outward from the function address at +offset and -offset, finding the closest available page first. This is an improved version of PR #122 that fixes two additional issues: - Searches both directions (not just downward) for robustness - Avoids an infinite loop when checked_sub fails by keeping offset increment outside the inner direction loop Also adds tests: - Unit test verifying JIT allocation is close to source (<128MB, not ~2GB) - Unit test verifying JIT allocation is not near the stack region - Integration test for stack growth after patching - Integration test for concurrent patching with deep stack usage (8 threads) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 6f4b5aa commit 98e4a92

2 files changed

Lines changed: 262 additions & 23 deletions

File tree

src/injector_core/common.rs

Lines changed: 141 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -213,33 +213,47 @@ fn allocate_jit_memory_windows(_src: &FuncPtrInternal, code_size: usize) -> *mut
213213

214214
#[cfg(target_arch = "x86_64")]
215215
{
216-
let max_range: usize = 0x8000_0000; // ±2GB
217-
let original_addr = _src.as_ptr() as usize;
218-
let page_size = unsafe { get_page_size() };
219-
let mut addr = original_addr.saturating_sub(max_range);
220-
221-
while addr <= original_addr + max_range {
222-
let ptr = unsafe {
223-
VirtualAlloc(
224-
addr as *mut c_void,
225-
code_size,
226-
MEM_COMMIT | MEM_RESERVE,
227-
PAGE_EXECUTE_READWRITE,
228-
)
229-
};
230-
231-
if !ptr.is_null() {
232-
let allocated = ptr as usize;
233-
if allocated.abs_diff(original_addr) <= max_range {
234-
return ptr as *mut u8;
216+
let max_range: u64 = 0x8000_0000; // ±2GB
217+
let original_addr = _src.as_ptr() as u64;
218+
let page_size = unsafe { get_page_size() as u64 };
219+
220+
// Search outward from the function address to find the CLOSEST free page.
221+
// This avoids allocating far from the function (e.g., in/near stack memory),
222+
// which could disrupt the stack guard page and cause STATUS_STACK_OVERFLOW.
223+
let mut offset: u64 = 0;
224+
while offset <= max_range {
225+
for &dir in &[1i64, -1i64] {
226+
let hint = if dir > 0 {
227+
original_addr.checked_add(offset)
228+
} else if offset > 0 {
229+
original_addr.checked_sub(offset)
235230
} else {
236-
unsafe {
237-
VirtualFree(ptr, 0, MEM_RELEASE);
231+
continue; // Already tried offset=0 with dir=1
232+
};
233+
234+
let Some(hint_addr) = hint else { continue };
235+
236+
let ptr = unsafe {
237+
VirtualAlloc(
238+
hint_addr as *mut c_void,
239+
code_size,
240+
MEM_COMMIT | MEM_RESERVE,
241+
PAGE_EXECUTE_READWRITE,
242+
)
243+
};
244+
if !ptr.is_null() {
245+
let allocated = ptr as u64;
246+
let diff = allocated.abs_diff(original_addr);
247+
if diff <= max_range {
248+
return ptr as *mut u8;
249+
} else {
250+
unsafe {
251+
VirtualFree(ptr, 0, MEM_RELEASE);
252+
}
238253
}
239254
}
240255
}
241-
242-
addr += page_size;
256+
offset += page_size;
243257
}
244258

245259
panic!("Failed to allocate executable memory within ±2GB of original function address on x86_64 Windows");
@@ -496,3 +510,107 @@ unsafe fn clear_cache(start: *mut u8, end: *mut u8) {
496510
core::arch::asm!("dsb sy", "isb", options(nostack, nomem));
497511
}
498512
}
513+
514+
#[cfg(test)]
515+
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
516+
mod tests {
517+
use super::*;
518+
519+
/// A dummy function used as the "source" address for JIT allocation tests.
520+
#[inline(never)]
521+
fn dummy_target_function() -> i32 {
522+
std::hint::black_box(42)
523+
}
524+
525+
/// Verify that `allocate_jit_memory` returns an address close to the source function,
526+
/// not at the far end of the ±2GB range where it could collide with the stack.
527+
///
528+
/// The old x86_64 Windows implementation scanned linearly from `func_addr - 2GB`,
529+
/// which could return memory near the stack guard pages. The fixed implementation
530+
/// searches outward from the function address, so the result should be much closer.
531+
#[test]
532+
fn test_jit_allocation_is_close_to_source() {
533+
let func_ptr = unsafe {
534+
FuncPtrInternal::new(
535+
std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(),
536+
)
537+
};
538+
let func_addr = func_ptr.as_ptr() as u64;
539+
540+
let jit_ptr = allocate_jit_memory(&func_ptr, 256);
541+
assert!(!jit_ptr.is_null(), "JIT allocation should succeed");
542+
543+
let jit_addr = jit_ptr as u64;
544+
let distance = func_addr.abs_diff(jit_addr);
545+
546+
// The allocation should be relatively close — within 128MB.
547+
// The old buggy code would often return addresses ~2GB away, near the stack.
548+
let max_acceptable_distance: u64 = 128 * 1024 * 1024; // 128MB
549+
assert!(
550+
distance <= max_acceptable_distance,
551+
"JIT memory should be allocated close to the function. \
552+
Function at {func_addr:#x}, JIT at {jit_addr:#x}, distance: {distance} bytes ({} MB). \
553+
Expected within {max_acceptable_distance} bytes ({} MB).",
554+
distance / (1024 * 1024),
555+
max_acceptable_distance / (1024 * 1024),
556+
);
557+
558+
// Clean up
559+
unsafe {
560+
#[cfg(any(target_os = "linux", target_os = "macos"))]
561+
{
562+
libc::munmap(jit_ptr as *mut c_void, 256);
563+
}
564+
#[cfg(target_os = "windows")]
565+
{
566+
VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE);
567+
}
568+
}
569+
}
570+
571+
/// Verify that JIT allocation does NOT land in the current thread's stack region.
572+
/// This directly tests the root cause of the STATUS_STACK_OVERFLOW crash: the old
573+
/// algorithm could allocate JIT memory in/near the stack, disrupting the guard page.
574+
#[test]
575+
fn test_jit_allocation_not_in_stack_region() {
576+
let func_ptr = unsafe {
577+
FuncPtrInternal::new(
578+
std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(),
579+
)
580+
};
581+
582+
// Use a stack local's address to approximate the stack location
583+
let stack_local: u64 = 0;
584+
let stack_addr = &stack_local as *const u64 as u64;
585+
586+
let jit_ptr = allocate_jit_memory(&func_ptr, 256);
587+
assert!(!jit_ptr.is_null(), "JIT allocation should succeed");
588+
589+
let jit_addr = jit_ptr as u64;
590+
// Stack on Windows x86_64 is typically 1-8MB. Use a conservative 16MB guard zone.
591+
let stack_guard_zone: u64 = 16 * 1024 * 1024;
592+
let distance_to_stack = jit_addr.abs_diff(stack_addr);
593+
594+
assert!(
595+
distance_to_stack > stack_guard_zone,
596+
"JIT memory should NOT be near the stack! \
597+
JIT at {jit_addr:#x}, stack approx at {stack_addr:#x}, \
598+
distance: {distance_to_stack} bytes ({} MB). \
599+
Must be > {stack_guard_zone} bytes ({} MB) from stack.",
600+
distance_to_stack / (1024 * 1024),
601+
stack_guard_zone / (1024 * 1024),
602+
);
603+
604+
// Clean up
605+
unsafe {
606+
#[cfg(any(target_os = "linux", target_os = "macos"))]
607+
{
608+
libc::munmap(jit_ptr as *mut c_void, 256);
609+
}
610+
#[cfg(target_os = "windows")]
611+
{
612+
VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE);
613+
}
614+
}
615+
}
616+
}

tests/stack_safety.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Test that JIT memory allocation does not interfere with stack growth.
2+
//
3+
// The old x86_64 Windows implementation of `allocate_jit_memory` scanned linearly
4+
// from `func_addr - 2GB`, which could allocate memory in/near the stack region,
5+
// disrupting the stack guard page and causing STATUS_STACK_OVERFLOW (0xc00000fd)
6+
// during parallel test execution.
7+
//
8+
// These tests verify that after patching functions, deep stack usage still works.
9+
#![cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "arm"))]
10+
11+
use std::sync::{Arc, Barrier};
12+
use std::thread;
13+
14+
use injectorpp::interface::injector::*;
15+
16+
#[inline(never)]
17+
fn get_value_stack_test() -> i32 {
18+
std::hint::black_box(1)
19+
}
20+
21+
#[inline(never)]
22+
fn get_other_value_stack_test() -> i32 {
23+
std::hint::black_box(2)
24+
}
25+
26+
#[inline(never)]
27+
fn is_enabled_stack_test() -> bool {
28+
std::hint::black_box(false)
29+
}
30+
31+
/// Consume stack space via recursion. Each frame is ~256 bytes due to the array.
32+
/// At depth 2000, this uses ~512KB of the thread's stack.
33+
#[inline(never)]
34+
fn consume_stack(depth: u32) -> u64 {
35+
if depth == 0 {
36+
return 1;
37+
}
38+
// Force the compiler to keep a sizable stack frame.
39+
let buf = std::hint::black_box([0u8; 256]);
40+
let result = consume_stack(depth - 1);
41+
std::hint::black_box(buf[0] as u64) + result
42+
}
43+
44+
/// After patching a function, verify that deep recursion still works.
45+
/// With the old buggy JIT allocator, the stack guard page could be disrupted,
46+
/// causing a stack overflow even at moderate recursion depths.
47+
#[test]
48+
fn test_stack_growth_works_after_patching() {
49+
let mut injector = InjectorPP::new();
50+
injector
51+
.when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32))
52+
.will_execute(injectorpp::fake!(
53+
func_type: fn() -> i32,
54+
returns: 42
55+
));
56+
57+
// Verify the fake works
58+
assert_eq!(get_value_stack_test(), 42);
59+
60+
// Now do deep recursion — this should NOT cause STATUS_STACK_OVERFLOW.
61+
// If JIT memory was allocated in the stack region (old bug), this would crash.
62+
let result = consume_stack(2000);
63+
assert!(result > 0, "Deep recursion should succeed after patching");
64+
}
65+
66+
/// Multiple threads concurrently patch different functions and then exercise
67+
/// deep stack usage. This replicates the conditions of the original crash:
68+
/// parallel tests + injectorpp patching + significant stack consumption.
69+
#[test]
70+
fn test_concurrent_patching_with_deep_stack_usage() {
71+
let thread_count = 8;
72+
let barrier = Arc::new(Barrier::new(thread_count));
73+
let mut handles = Vec::new();
74+
75+
for i in 0..thread_count {
76+
let b = barrier.clone();
77+
handles.push(thread::spawn(move || {
78+
let mut injector = InjectorPP::new();
79+
80+
// Each thread patches a function
81+
match i % 3 {
82+
0 => {
83+
injector
84+
.when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32))
85+
.will_execute(injectorpp::fake!(
86+
func_type: fn() -> i32,
87+
returns: 100
88+
));
89+
}
90+
1 => {
91+
injector
92+
.when_called(injectorpp::func!(fn(get_other_value_stack_test)() -> i32))
93+
.will_execute(injectorpp::fake!(
94+
func_type: fn() -> i32,
95+
returns: 200
96+
));
97+
}
98+
_ => {
99+
injector
100+
.when_called(injectorpp::func!(fn(is_enabled_stack_test)() -> bool))
101+
.will_return_boolean(true);
102+
}
103+
};
104+
105+
// Synchronize: all threads have patched before anyone recurses
106+
b.wait();
107+
108+
// Deep recursion — would crash if JIT memory disrupted the stack guard page
109+
let result = consume_stack(1500);
110+
assert!(
111+
result > 0,
112+
"Thread {i} should complete deep recursion without stack overflow"
113+
);
114+
}));
115+
}
116+
117+
for (i, h) in handles.into_iter().enumerate() {
118+
h.join()
119+
.unwrap_or_else(|_| panic!("Thread {i} panicked — possible stack overflow"));
120+
}
121+
}

0 commit comments

Comments
 (0)