diff --git a/Cargo.lock b/Cargo.lock index 10cd42fccf814d..2858c0556ddba6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -270,4 +270,5 @@ dependencies = [ "insta", "jit", "rand", + "yjit", ] diff --git a/ext/io/console/console.c b/ext/io/console/console.c index 9c7b674690ce1c..5f3c9478f75296 100644 --- a/ext/io/console/console.c +++ b/ext/io/console/console.c @@ -2000,14 +2000,13 @@ InitVM_console(void) rb_define_method(rb_cIO, "ttyname", console_ttyname, 0); rb_define_singleton_method(rb_cIO, "console", console_dev, -1); { - /* :stopdoc: */ + /* :nodoc: */ VALUE mReadable = rb_define_module_under(rb_cIO, "generic_readable"); - /* :startdoc: */ rb_define_method(mReadable, "getch", io_getch, -1); rb_define_method(mReadable, "getpass", io_getpass, -1); } { - /* :stopdoc: */ + /* :nodoc: */ cConmode = rb_define_class_under(rb_cIO, "ConsoleMode", rb_cObject); rb_define_const(cConmode, "VERSION", rb_obj_freeze(rb_str_new_cstr(IO_CONSOLE_VERSION))); rb_define_alloc_func(cConmode, conmode_alloc); @@ -2016,6 +2015,5 @@ InitVM_console(void) rb_define_method(cConmode, "echo=", conmode_set_echo, 1); rb_define_method(cConmode, "raw!", conmode_set_raw, -1); rb_define_method(cConmode, "raw", conmode_raw_new, -1); - /* :startdoc: */ } } diff --git a/ext/psych/lib/psych/versions.rb b/ext/psych/lib/psych/versions.rb index 6b22379fe6afd5..4c7a80d5c84274 100644 --- a/ext/psych/lib/psych/versions.rb +++ b/ext/psych/lib/psych/versions.rb @@ -5,6 +5,6 @@ module Psych VERSION = '5.3.1' if RUBY_ENGINE == 'jruby' - DEFAULT_SNAKEYAML_VERSION = '3.0.1'.freeze + DEFAULT_SNAKEYAML_VERSION = '2.10'.freeze end end diff --git a/gc.c b/gc.c index 9a724709fc4db3..5001bc5c01cb2e 100644 --- a/gc.c +++ b/gc.c @@ -1009,6 +1009,29 @@ gc_validate_pc(VALUE obj) #endif } +NOINLINE(static void gc_newobj_hook(VALUE obj)); +static void +gc_newobj_hook(VALUE obj) +{ + int lev = RB_GC_VM_LOCK_NO_BARRIER(); + { + size_t slot_size = rb_gc_obj_slot_size(obj); + memset((char *)obj + sizeof(struct RBasic), 0, slot_size - sizeof(struct RBasic)); + + /* We must disable GC here because the callback could call xmalloc + * which could potentially trigger a GC, and a lot of code is unsafe + * to trigger a GC right after an object has been allocated because + * they perform initialization for the object and assume that the + * GC does not trigger before then. */ + bool gc_disabled = RTEST(rb_gc_disable_no_rest()); + { + rb_gc_event_hook(obj, RUBY_INTERNAL_EVENT_NEWOBJ); + } + if (!gc_disabled) rb_gc_enable(); + } + RB_GC_VM_UNLOCK_NO_BARRIER(lev); +} + static inline VALUE newobj_of(rb_ractor_t *cr, VALUE klass, VALUE flags, shape_id_t shape_id, bool wb_protected, size_t size) { @@ -1018,23 +1041,7 @@ newobj_of(rb_ractor_t *cr, VALUE klass, VALUE flags, shape_id_t shape_id, bool w gc_validate_pc(obj); if (UNLIKELY(rb_gc_event_hook_required_p(RUBY_INTERNAL_EVENT_NEWOBJ))) { - int lev = RB_GC_VM_LOCK_NO_BARRIER(); - { - size_t slot_size = rb_gc_obj_slot_size(obj); - memset((char *)obj + sizeof(struct RBasic), 0, slot_size - sizeof(struct RBasic)); - - /* We must disable GC here because the callback could call xmalloc - * which could potentially trigger a GC, and a lot of code is unsafe - * to trigger a GC right after an object has been allocated because - * they perform initialization for the object and assume that the - * GC does not trigger before then. */ - bool gc_disabled = RTEST(rb_gc_disable_no_rest()); - { - rb_gc_event_hook(obj, RUBY_INTERNAL_EVENT_NEWOBJ); - } - if (!gc_disabled) rb_gc_enable(); - } - RB_GC_VM_UNLOCK_NO_BARRIER(lev); + gc_newobj_hook(obj); } #if RGENGC_CHECK_MODE diff --git a/gems/bundled_gems b/gems/bundled_gems index 4164ff4d405904..4966b928f5e2d4 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -37,7 +37,7 @@ ostruct 0.6.3 https://github.com/ruby/ostruct pstore 0.2.1 https://github.com/ruby/pstore benchmark 0.5.0 https://github.com/ruby/benchmark logger 1.7.0 https://github.com/ruby/logger -rdoc 7.2.0 https://github.com/ruby/rdoc +rdoc 7.2.0 https://github.com/ruby/rdoc 911b122a587e24f05434dbeb2c3e39cea607e21f win32ole 1.9.3 https://github.com/ruby/win32ole irb 1.17.0 https://github.com/ruby/irb reline 0.6.3 https://github.com/ruby/reline diff --git a/lib/bundled_gems.rb b/lib/bundled_gems.rb index c5893a241efae7..b64991af0ca050 100644 --- a/lib/bundled_gems.rb +++ b/lib/bundled_gems.rb @@ -1,9 +1,9 @@ # -*- frozen-string-literal: true -*- -# :stopdoc: -module Gem +module Gem # :nodoc: + # TODO: the nodoc above is a workaround for RDoc's Prism parser handling stopdoc differently, we may want to use + # stopdoc/startdoc pair like before end -# :startdoc: module Gem::BUNDLED_GEMS # :nodoc: SINCE = { diff --git a/misc/jit_perf.py b/misc/jit_perf.py index 37a63f5329e17f..bc0f961b20bf07 100755 --- a/misc/jit_perf.py +++ b/misc/jit_perf.py @@ -22,8 +22,7 @@ def categorize_symbol(dso, symbol): return '[sha256]' elif symbol.startswith('[JIT] gen_send'): return '[JIT send]' - # TODO: Stop using zjit:: as the prefix for JIT code. Rust modules and JIT code should use different namespaces. - elif symbol.startswith('[JIT]') or (symbol.startswith('zjit::') and '@') or symbol == 'zjit::ZJIT entry trampoline': + elif symbol.startswith('[JIT]') or symbol.startswith('ZJIT: ') or dso.startswith('perf-'): return '[JIT code]' elif '::' in symbol or symbol.startswith('_ZN4yjit') or symbol.startswith('_ZN4zjit'): return '[JIT compile]' diff --git a/zjit/Cargo.toml b/zjit/Cargo.toml index 0ef8ff511005a8..1bfcc4aff06813 100644 --- a/zjit/Cargo.toml +++ b/zjit/Cargo.toml @@ -14,6 +14,7 @@ jit = { version = "0.1.0", path = "../jit" } [dev-dependencies] insta = "1.43.1" rand = "0.9" +yjit = { path = "../yjit" } # NOTE: Development builds select a set of these via configure.ac # For debugging, `make V=1` shows exact cargo invocation. diff --git a/zjit/build.rs b/zjit/build.rs index 4ee3d65b33062e..b357c6b547aa27 100644 --- a/zjit/build.rs +++ b/zjit/build.rs @@ -25,13 +25,5 @@ fn main() { println!("cargo:rustc-link-lib={lib_name}"); } } - - // When doing a combo build, there is a copy of ZJIT symbols in libruby.a - // and Cargo builds another copy for the test binary. Tell the linker to - // not complaint about duplicate symbols. For some reason, darwin doesn't - // suffer the same issue. - if env::var("TARGET").unwrap().contains("linux") { - println!("cargo:rustc-link-arg=-Wl,--allow-multiple-definition"); - } } } diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index f7a3a95ee4322b..fab16166e6262b 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1734,6 +1734,13 @@ mod tests { (asm, CodeBlock::new_dummy()) } + fn setup_asm_with_scratch_reg() -> (Assembler, CodeBlock, Opnd) { + crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler + let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + asm.new_block_without_id("test"); + (asm, CodeBlock::new_dummy(), scratch_reg) + } + #[test] fn test_lir_string() { use crate::hir::SideExitReason; @@ -2159,9 +2166,7 @@ mod tests { #[test] fn test_store_with_valid_scratch_reg() { - let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); - asm.new_block_without_id("test"); - let mut cb = CodeBlock::new_dummy(); + let (mut asm, mut cb, scratch_reg) = setup_asm_with_scratch_reg(); asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into()); asm.compile_with_num_regs(&mut cb, 0); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 4640bac15c6b02..03dc02c67846df 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -5,11 +5,11 @@ use std::panic; use std::rc::Rc; use std::sync::{Arc, Mutex}; use crate::bitset::BitSet; -use crate::codegen::local_size_and_idx_to_ep_offset; +use crate::codegen::{local_size_and_idx_to_ep_offset, perf_symbol_range_start, perf_symbol_range_end}; use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32, vm_stack_canary}; use crate::hir::{Invariant, SideExitReason}; use crate::hir; -use crate::options::{TraceExits, get_option}; +use crate::options::{TraceExits, PerfMap, get_option}; use crate::cruby::VALUE; use crate::payload::IseqVersionRef; use crate::stats::{exit_counter_ptr, exit_counter_ptr_for_opcode, side_exit_counter, CompileError}; @@ -2675,6 +2675,13 @@ impl Assembler // Map from SideExit to compiled Label. This table is used to deduplicate side exit code. let mut compiled_exits: HashMap = HashMap::new(); + // Start a new perf range for side exits + let perf_symbol = if get_option!(perf) == Some(PerfMap::HIR) { + Some(perf_symbol_range_start(self, "side exit")) + } else { + None + }; + // Mark the start of side-exit code so we can measure its size if !targets.is_empty() { self.pos_marker(move |start_pos, cb| { @@ -2756,6 +2763,11 @@ impl Assembler crate::stats::incr_counter_by(crate::stats::Counter::compile_side_exit_time_ns, nanos as u64); } + // Close the current perf range for side exits + if let Some(perf_symbol) = &perf_symbol { + perf_symbol_range_end(self, perf_symbol); + } + // Extract exit instructions and restore the previous current block let exit_insns = take(&mut self.basic_blocks[exit_block.0].insns); self.set_current_block(saved_block); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index a5ca214de2c5c4..a1f7d3f65c714b 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -23,7 +23,7 @@ use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NAT use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason}; use crate::hir_type::{types, Type}; -use crate::options::get_option; +use crate::options::{get_option, PerfMap}; use crate::cast::IntoUsize; /// At the moment, we support recompiling each ISEQ only once. @@ -211,7 +211,7 @@ pub fn gen_iseq_call(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result<(), } /// Write an entry to the perf map in /tmp -fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) { +fn register_with_perf(symbol_name: String, start_ptr: usize, code_size: usize) { use std::io::Write; let perf_map = format!("/tmp/perf-{}.map", std::process::id()); let Ok(file) = std::fs::OpenOptions::new().create(true).append(true).open(&perf_map) else { @@ -219,8 +219,8 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) { return; }; let mut file = std::io::BufWriter::new(file); - let Ok(_) = writeln!(file, "{start_ptr:#x} {code_size:#x} zjit::{iseq_name}") else { - debug!("Failed to write {iseq_name} to perf map file: {perf_map}"); + let Ok(_) = writeln!(file, "{start_ptr:#x} {code_size:#x} ZJIT: {symbol_name}") else { + debug!("Failed to write {symbol_name} to perf map file: {perf_map}"); return; }; } @@ -244,11 +244,11 @@ pub fn gen_entry_trampoline(cb: &mut CodeBlock) -> Result let (code_ptr, gc_offsets) = asm.compile(cb)?; assert!(gc_offsets.is_empty()); - if get_option!(perf) { + if get_option!(perf).is_some() { let start_ptr = code_ptr.raw_addr(cb); let end_ptr = cb.get_write_ptr().raw_addr(cb); let code_size = end_ptr - start_ptr; - register_with_perf("ZJIT entry trampoline".into(), start_ptr, code_size); + register_with_perf("entry trampoline".into(), start_ptr, code_size); } Ok(code_ptr) } @@ -448,7 +448,23 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func assert!(insn_idx == block.insns().len() - 1, "Jump must be the last instruction in HIR block"); }, _ => { - if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { + // Start a new perf range for the HIR instruction. For now, we do this only for + // non-terminator instructions because LIR blocks must end with a terminator instruction. + let perf_symbol = if get_option!(perf) == Some(PerfMap::HIR) && !insn.is_terminator() { + let insn_name = format!("{insn}").split_whitespace().next().unwrap().to_string(); + Some(perf_symbol_range_start(&mut asm, &insn_name)) + } else { + None + }; + + let result = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn); + + // Close the current perf range for the HIR instruction. + if let Some(perf_symbol) = &perf_symbol { + perf_symbol_range_end(&mut asm, perf_symbol); + } + + if let Err(last_snapshot) = result { debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn)); gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot)); @@ -472,7 +488,7 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func // Generate code if everything can be compiled let result = asm.compile(cb); if let Ok((start_ptr, _)) = result { - if get_option!(perf) { + if get_option!(perf) == Some(PerfMap::ISEQ) { let start_usize = start_ptr.raw_addr(cb); let end_usize = cb.get_write_ptr().raw_addr(cb); let code_size = end_usize - start_usize; @@ -1274,14 +1290,14 @@ fn gen_load_self() -> Opnd { fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32, return_type: Type) -> Opnd { gen_incr_counter(asm, Counter::load_field_count); asm_comment!(asm, "Load field id={} offset={}", id.contents_lossy(), offset); - let recv = asm.load(recv); + let recv = asm.load_mem(recv); asm.load(Opnd::mem(return_type.num_bits(), recv, offset)) } fn gen_store_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32, val: Opnd, val_type: Type) { gen_incr_counter(asm, Counter::store_field_count); asm_comment!(asm, "Store field id={} offset={}", id.contents_lossy(), offset); - let recv = asm.load(recv); + let recv = asm.load_mem(recv); asm.store(Opnd::mem(val_type.num_bits(), recv, offset), val); } @@ -1290,7 +1306,7 @@ fn gen_write_barrier(jit: &mut JITState, asm: &mut Assembler, recv: Opnd, val: O // rb_obj_written() does: if (!RB_SPECIAL_CONST_P(val)) { rb_gc_writebarrier(recv, val); } if !val_type.is_immediate() { asm_comment!(asm, "Write barrier"); - let recv = asm.load(recv); + let recv = asm.load_mem(recv); // Create a result block that all paths converge to let hir_block_id = asm.current_block().hir_block_id; @@ -1487,7 +1503,9 @@ fn gen_send_iseq_direct( // `unspecializable_call_type`. let block_handler = blockiseq.map(|b| gen_block_handler_specval(asm, b)); - let (frame_type, specval) = if VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(cme) } { + let callee_is_bmethod = VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(cme) }; + + let (frame_type, specval) = if callee_is_bmethod { // Extract EP from the Proc instance let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; let proc = unsafe { rb_jit_get_proc_ptr(procv) }; @@ -1554,7 +1572,14 @@ fn gen_send_iseq_direct( c_args.push(recv); c_args.extend(&args); if needs_block { - c_args.push(specval); + if callee_is_bmethod { + // For bmethods, specval is the captured EP, not the block handler. + // The block param needs nil (no block) or a Proc value. + assert!(block_handler.is_none(), "at the moment, HIR builder never emits a direct send for a to-bmethod send-with-literal-block"); + c_args.push(Qnil.into()); + } else { + c_args.push(specval); + } } let num_optionals_passed = if params.flags.has_opt() != 0 { @@ -1750,8 +1775,8 @@ fn gen_array_aref( array: Opnd, index: Opnd, ) -> lir::Opnd { - let unboxed_idx = asm.load(index); - let array = asm.load(array); + let unboxed_idx = asm.load_mem(index); + let array = asm.load_mem(array); let array_ptr = gen_array_ptr(asm, array); let elem_offset = asm.lshift(unboxed_idx, Opnd::UImm(SIZEOF_VALUE.trailing_zeros() as u64)); let elem_ptr = asm.add(array_ptr, elem_offset); @@ -1764,8 +1789,8 @@ fn gen_array_aset( index: Opnd, val: Opnd, ) { - let unboxed_idx = asm.load(index); - let array = asm.load(array); + let unboxed_idx = asm.load_mem(index); + let array = asm.load_mem(array); let array_ptr = gen_array_ptr(asm, array); let elem_offset = asm.lshift(unboxed_idx, Opnd::UImm(SIZEOF_VALUE.trailing_zeros() as u64)); let elem_ptr = asm.add(array_ptr, elem_offset); @@ -1778,7 +1803,7 @@ fn gen_array_pop(asm: &mut Assembler, array: Opnd, state: &FrameState) -> lir::O } fn gen_array_length(asm: &mut Assembler, array: Opnd) -> lir::Opnd { - let array = asm.load(array); + let array = asm.load_mem(array); let flags = Opnd::mem(VALUE_BITS, array, RUBY_OFFSET_RBASIC_FLAGS); let embedded_len = asm.and(flags, (RARRAY_EMBED_LEN_MASK as u64).into()); let embedded_len = asm.rshift(embedded_len, (RARRAY_EMBED_LEN_SHIFT as u64).into()); @@ -1943,10 +1968,7 @@ fn gen_is_a(jit: &mut JITState, asm: &mut Assembler, obj: Opnd, class: Opnd) -> args: vec![v], }); - let val = match obj { - Opnd::Reg(_) | Opnd::VReg { .. } => obj, - _ => asm.load(obj), - }; + let val = asm.load_mem(obj); // Immediate → definitely not String/Array/Hash asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); @@ -2236,7 +2258,7 @@ fn gen_box_bool(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { fn gen_box_fixnum(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, state: &FrameState) -> lir::Opnd { // Load the value, then test for overflow and tag it - let val = asm.load(val); + let val = asm.load_mem(val); let shifted = asm.lshift(val, Opnd::UImm(1)); asm.jo(jit, side_exit(jit, state, BoxFixnumOverflow)); asm.or(shifted, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)) @@ -2299,10 +2321,7 @@ fn gen_has_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, ty: Typ // If val isn't in a register, load it to use it as the base of Opnd::mem later. // TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685) - let val = match val { - Opnd::Reg(_) | Opnd::VReg { .. } => val, - _ => asm.load(val), - }; + let val = asm.load_mem(val); // Immediate → definitely not the class asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); @@ -2364,10 +2383,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard // If val isn't in a register, load it to use it as the base of Opnd::mem later. // TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685) - let val = match val { - Opnd::Reg(_) | Opnd::VReg { .. } => val, - _ => asm.load(val), - }; + let val = asm.load_mem(val); // Check if it's a special constant let side_exit = side_exit(jit, state, GuardType(guard_type)); @@ -2394,10 +2410,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard asm.cmp(val, Qfalse.into()); asm.je(jit, side.clone()); - let val = match val { - Opnd::Reg(_) | Opnd::VReg { .. } => val, - _ => asm.load(val), - }; + let val = asm.load_mem(val); let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS)); let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64)); @@ -2414,10 +2427,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard asm.cmp(val, Qfalse.into()); asm.je(jit, side.clone()); - let val = match val { - Opnd::Reg(_) | Opnd::VReg { .. } => val, - _ => asm.load(val), - }; + let val = asm.load_mem(val); let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS)); let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64)); @@ -2455,10 +2465,7 @@ fn gen_guard_type_not(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, g asm.cmp(val, Qfalse.into()); asm.je(jit, cont_edge()); - let val = match val { - Opnd::Reg(_) | Opnd::VReg { .. } => val, - _ => asm.load(val), - }; + let val = asm.load_mem(val); let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS)); let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64)); @@ -3102,7 +3109,7 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec // Generate RSTRING_PTR fn get_string_ptr(asm: &mut Assembler, string: Opnd) -> Opnd { asm_comment!(asm, "get string pointer for embedded or heap"); - let string = asm.load(string); + let string = asm.load_mem(string); let flags = Opnd::mem(VALUE_BITS, string, RUBY_OFFSET_RBASIC_FLAGS); asm.test(flags, (RSTRING_NOEMBED as u64).into()); let heap_ptr = asm.load(Opnd::mem( @@ -3168,6 +3175,15 @@ fn aligned_stack_bytes(num_slots: usize) -> usize { } impl Assembler { + /// Emits a load for memory based operands and returns a vreg, + /// otherwise returns recv. + fn load_mem(&mut self, recv: Opnd) -> Opnd { + match recv { + Opnd::VReg { .. } | Opnd::Reg(_) => recv, + _ => self.load(recv), + } + } + /// Make a C call while marking the start and end positions for IseqCall fn ccall_with_iseq_call(&mut self, fptr: *const u8, opnds: Vec, iseq_call: &IseqCallRef) -> Opnd { // We need to create our own branch rc objects so that we can move the closure below @@ -3248,6 +3264,34 @@ impl IseqCall { } } +type PerfSymbol = Rc>>; + +/// Mark the start of a perf symbol range via pos_marker. +/// Returns a handle to pass to perf_symbol_range_end. +pub fn perf_symbol_range_start(asm: &mut Assembler, symbol_name: &str) -> PerfSymbol { + let symbol_name = symbol_name.to_string(); + let perf_symbol: PerfSymbol = Rc::new(RefCell::new(None)); + let current = perf_symbol.clone(); + asm.pos_marker(move |start, _| { + let mut current = current.borrow_mut(); + assert!(current.is_none(), "perf symbol range already open"); + *current = Some((start, symbol_name.clone())); + }); + perf_symbol +} + +/// Mark the end of a perf symbol range via pos_marker. +pub fn perf_symbol_range_end(asm: &mut Assembler, perf_symbol: &PerfSymbol) { + let current = perf_symbol.clone(); + asm.pos_marker(move |end, cb| { + if let Some((start, name)) = current.borrow_mut().take() { + let start_addr = start.raw_addr(cb); + let code_size = end.raw_addr(cb) - start_addr; + register_with_perf(name, start_addr, code_size); + } + }); +} + #[cfg(test)] #[path = "codegen_tests.rs"] mod tests; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 9c409e36fb955e..d4ac6929345531 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -2228,7 +2228,7 @@ pub enum ValidationError { } /// Check if we can do a direct send to the given iseq with the given args. -fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq_t, ci: *const rb_callinfo, send_insn: InsnId, args: &[InsnId], blockiseq: Option) -> bool { +fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq_t, ci: *const rb_callinfo, send_insn: InsnId, args: &[InsnId]) -> bool { let mut can_send = true; let mut count_failure = |counter| { can_send = false; @@ -2236,16 +2236,15 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq }; let params = unsafe { iseq.params() }; - let caller_has_literal_block: bool = blockiseq.is_some(); let callee_has_block_param = 0 != params.flags.has_block(); + let caller_passes_block_arg = (unsafe { rb_vm_ci_flag(ci) } & VM_CALL_ARGS_BLOCKARG) != 0; use Counter::*; if 0 != params.flags.has_rest() { count_failure(complex_arg_pass_param_rest) } if 0 != params.flags.has_post() { count_failure(complex_arg_pass_param_post) } - if callee_has_block_param && !caller_has_literal_block - { count_failure(complex_arg_pass_param_block) } if 0 != params.flags.forwardable() { count_failure(complex_arg_pass_param_forwardable) } - + if callee_has_block_param && caller_passes_block_arg + { count_failure(complex_arg_pass_param_block) } if 0 != params.flags.has_kwrest() { count_failure(complex_arg_pass_param_kwrest) } if !can_send { @@ -3570,7 +3569,7 @@ impl Function { // Only specialize positional-positional calls // TODO(max): Handle other kinds of parameter passing let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; - if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), blockiseq) { + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice()) { self.push_insn_id(block, insn_id); continue; } @@ -3609,7 +3608,7 @@ impl Function { let capture = unsafe { proc_block.as_.captured.as_ref() }; let iseq = unsafe { *capture.code.iseq.as_ref() }; - if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), None) { + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice()) { self.push_insn_id(block, insn_id); continue; } @@ -3976,7 +3975,7 @@ impl Function { // If not, we can't do direct dispatch. let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; // TODO: pass Option to can_direct_send when we start specializing `super { ... }`. - if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { + if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice()) { self.push_insn_id(block, insn_id); self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); continue; @@ -5845,33 +5844,6 @@ impl Function { Ok(()) } - // Validate that every instruction use is from a block-local definition, which is a temporary - // constraint until we get a global register allocator. - // TODO(tenderworks): Remove this - fn temporary_validate_block_local_definite_assignment(&self) -> Result<(), ValidationError> { - for block in self.rpo() { - let mut assigned = InsnSet::with_capacity(self.insns.len()); - for ¶m in &self.blocks[block.0].params { - assigned.insert(param); - } - // Check that each instruction's operands are assigned - for &insn_id in &self.blocks[block.0].insns { - let insn_id = self.union_find.borrow().find_const(insn_id); - self.insns[insn_id.0].try_for_each_operand(|operand| { - let operand = self.union_find.borrow().find_const(operand); - if !assigned.get(operand) { - return Err(ValidationError::OperandNotDefined(block, insn_id, operand)); - } - Ok(()) - })?; - if self.insns[insn_id.0].has_output() { - assigned.insert(insn_id); - } - } - } - Ok(()) - } - /// Checks that each instruction('s representative) appears only once in the CFG. fn validate_insn_uniqueness(&self) -> Result<(), ValidationError> { let mut seen = InsnSet::with_capacity(self.insns.len()); @@ -6189,7 +6161,6 @@ impl Function { pub fn validate(&self) -> Result<(), ValidationError> { self.validate_block_terminators_and_jumps()?; self.validate_definite_assignment()?; - self.temporary_validate_block_local_definite_assignment()?; self.validate_insn_uniqueness()?; self.validate_types()?; Ok(()) @@ -6756,7 +6727,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { // Keep compiling blocks until the queue becomes empty let mut visited = HashSet::new(); let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; - while let Some((incoming_state, block, mut insn_idx, mut local_inval)) = queue.pop_front() { + while let Some((incoming_state, mut block, mut insn_idx, mut local_inval)) = queue.pop_front() { // Compile each block only once if visited.contains(&block) { continue; } visited.insert(block); @@ -7927,8 +7898,46 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnhandledYARVInsn(opcode) }); break; // End the block } - let result = fun.push_insn(block, Insn::GetIvar { self_val: self_param, id, ic, state: exit_id }); - state.stack_push(result); + if let Some(summary) = fun.polymorphic_summary(&profiles, self_param, exit_state.insn_idx) { + self_param = fun.push_insn(block, Insn::GuardType { val: self_param, guard_type: types::HeapBasicObject, state: exit_id }); + let shape = fun.load_shape(block, self_param); + let join_block = insn_idx_to_block.get(&insn_idx).copied().unwrap_or_else(|| fun.new_block(insn_idx)); + let join_param = fun.push_insn(join_block, Insn::Param); + // Dedup by expected shape so objects with different classes but the same shape can share code + // TODO(max): De-duplicate further by checking ivar offsets to allow + // different shapes with the same ivar layout to share code + let mut seen_shapes = Vec::with_capacity(summary.buckets().len()); + for &profiled_type in summary.buckets() { + // End of the buckets + if profiled_type.is_empty() { break; } + // Instance variable lookups on immediate values are always nil; don't bother + if profiled_type.flags().is_immediate() { continue; } + let expected_shape = profiled_type.shape(); + assert!(expected_shape.is_valid()); + if seen_shapes.contains(&expected_shape) { continue; } + seen_shapes.push(expected_shape); + let expected_shape_const = fun.push_insn(block, Insn::Const { val: Const::CShape(expected_shape) }); + let has_shape = fun.push_insn(block, Insn::IsBitEqual { left: shape, right: expected_shape_const }); + let iftrue_block = fun.new_block(insn_idx); + let target = BranchEdge { target: iftrue_block, args: vec![] }; + fun.push_insn(block, Insn::IfTrue { val: has_shape, target }); + let result = fun.load_ivar(iftrue_block, self_param, profiled_type, id, exit_id); + fun.push_insn(iftrue_block, Insn::Jump(BranchEdge { target: join_block, args: vec![result] })); + } + // In the fallthrough case, do a generic interpreter getivar and then join. + let result = fun.push_insn(block, Insn::GetIvar { self_val: self_param, id, ic, state: exit_id }); + fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: vec![result] })); + state.stack_push(join_param); + // Continue compilation from the join block at the next instruction. + // Make a copy of the current state without the args (pop the receiver + // and push the result) because we just use the locals/stack sizes to + // make the right number of Params + block = join_block; + } else { + // Possibly monomorphic case; handled in optimize_getivar + let result = fun.push_insn(block, Insn::GetIvar { self_val: self_param, id, ic, state: exit_id }); + state.stack_push(result); + } } YARVINSN_setinstancevariable => { let id = ID(get_arg(pc, 0).as_u64()); @@ -8761,17 +8770,6 @@ mod validation_tests { assert_matches_err(function.validate_definite_assignment(), ValidationError::OperandNotDefined(entry, val, dangling)); } - #[test] - fn not_defined_within_bb_block_local() { - let mut function = Function::new(std::ptr::null()); - let entry = function.entry_block; - // Create an instruction without making it belong to anything. - let dangling = function.new_insn(Insn::Const{val: Const::CBool(true)}); - let val = function.push_insn(function.entry_block, Insn::ArrayDup { val: dangling, state: InsnId(0usize) }); - function.seal_entries(); - assert_matches_err(function.temporary_validate_block_local_definite_assignment(), ValidationError::OperandNotDefined(entry, val, dangling)); - } - #[test] fn using_non_output_insn() { let mut function = Function::new(std::ptr::null()); @@ -8784,18 +8782,6 @@ mod validation_tests { assert_matches_err(function.validate_definite_assignment(), ValidationError::OperandNotDefined(entry, val, ret)); } - #[test] - fn using_non_output_insn_block_local() { - let mut function = Function::new(std::ptr::null()); - let entry = function.entry_block; - let const_ = function.push_insn(function.entry_block, Insn::Const{val: Const::CBool(true)}); - // Ret is a non-output instruction. - let ret = function.push_insn(function.entry_block, Insn::Return { val: const_ }); - let val = function.push_insn(function.entry_block, Insn::ArrayDup { val: ret, state: InsnId(0usize) }); - function.seal_entries(); - assert_matches_err(function.temporary_validate_block_local_definite_assignment(), ValidationError::OperandNotDefined(entry, val, ret)); - } - #[test] fn not_dominated_by_diamond() { // This tests that one branch is missing a definition which fails. diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index c0c89e35d46eb0..1e28ba77754ae0 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -4514,7 +4514,10 @@ mod hir_opt_tests { v12:NilClass = Const Value(nil) PatchPoint MethodRedefined(Hash@0x1008, new@0x1009, cme:0x1010) v46:HashExact = ObjectAllocClass Hash:VALUE(0x1008) - v19:BasicObject = Send v46, :initialize # SendFallbackReason: Complex argument passing + v47:Fixnum[0] = Const Value(0) + PatchPoint NoSingletonClass(Hash@0x1008) + PatchPoint MethodRedefined(Hash@0x1008, initialize@0x1038, cme:0x1040) + v51:BasicObject = SendDirect v46, 0x1068, :initialize (0x1078), v47 CheckInterrupts Return v46 "); @@ -7443,7 +7446,134 @@ mod hir_opt_tests { } #[test] - fn test_dont_optimize_getivar_polymorphic() { + fn test_optimize_getivar_polymorphic() { + set_call_threshold(3); + eval(r#" + class C + def foo_then_bar + @foo = 1 + @bar = 2 + end + + def bar_then_foo + 1000.times { |i| instance_variable_set(:"@v#{i}", i) } + @bar = 3 + @foo = 4 + end + + def foo = @foo + 1 + end + + O1 = C.new + O1.foo_then_bar + O2 = C.new + O2.bar_then_foo + O1.foo + O2.foo + "#); + assert_snapshot!(hir_string_proc("C.instance_method(:foo)"), @" + fn foo@:14: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + PatchPoint SingleRactorMode + v11:HeapBasicObject = GuardType v6, HeapBasicObject + v12:CShape = LoadField v11, :_shape_id@0x1000 + v14:CShape[0x1001] = Const CShape(0x1001) + v15:CBool = IsBitEqual v12, v14 + IfTrue v15, bb5() + v20:CShape[0x1002] = Const CShape(0x1002) + v21:CBool = IsBitEqual v12, v20 + IfTrue v21, bb6() + v25:BasicObject = GetIvar v11, :@foo + Jump bb4(v25) + bb5(): + v17:CPtr = LoadField v11, :_as_heap@0x1003 + v18:BasicObject = LoadField v17, :@foo@0x1004 + Jump bb4(v18) + bb6(): + v23:BasicObject = LoadField v11, :@foo@0x1003 + Jump bb4(v23) + bb4(v13:BasicObject): + v28:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Integer@0x1008, +@0x1010, cme:0x1018) + v39:Fixnum = GuardType v13, Fixnum + v40:Fixnum = FixnumAdd v39, v28 + CheckInterrupts + Return v40 + "); + } + + #[test] + fn test_optimize_getivar_polymorphic_with_subclass() { + set_call_threshold(3); + eval(r#" + class C + def initialize + @foo = 3 + end + + def foo = @foo + 1 + end + + class D < C + def initialize + super + @bar = 4 + end + end + + O1 = C.new + O2 = D.new + O1.foo + O2.foo + "#); + assert_snapshot!(hir_string_proc("C.instance_method(:foo)"), @" + fn foo@:7: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + PatchPoint SingleRactorMode + v11:HeapBasicObject = GuardType v6, HeapBasicObject + v12:CShape = LoadField v11, :_shape_id@0x1000 + v14:CShape[0x1001] = Const CShape(0x1001) + v15:CBool = IsBitEqual v12, v14 + IfTrue v15, bb5() + v19:CShape[0x1002] = Const CShape(0x1002) + v20:CBool = IsBitEqual v12, v19 + IfTrue v20, bb6() + v24:BasicObject = GetIvar v11, :@foo + Jump bb4(v24) + bb5(): + v17:BasicObject = LoadField v11, :@foo@0x1003 + Jump bb4(v17) + bb6(): + v22:BasicObject = LoadField v11, :@foo@0x1003 + Jump bb4(v22) + bb4(v13:BasicObject): + v27:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Integer@0x1008, +@0x1010, cme:0x1018) + v38:Fixnum = GuardType v13, Fixnum + v39:Fixnum = FixnumAdd v38, v27 + CheckInterrupts + Return v39 + "); + } + + #[test] + fn test_dont_optimize_attr_accessor_polymorphic() { set_call_threshold(3); eval(" class C @@ -7738,6 +7868,67 @@ mod hir_opt_tests { "); } + #[test] + fn test_send_iseq_with_block_param_no_block() { + let result = eval(" + def foo(&blk) + blk ? blk.call : 42 + end + def test = foo + test + test + "); + assert_eq!(VALUE::fixnum_from_usize(42), result); + assert_snapshot!(hir_string("test"), @" + fn test@:5: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_send_bmethod_with_block_param_no_block() { + let result = eval(" + define_method(:foo) { |&blk| + blk ? blk.call : 42 + } + def test = foo + test + test + "); + assert_eq!(VALUE::fixnum_from_usize(42), result); + assert_snapshot!(hir_string("test"), @" + fn test@:5: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + PatchPoint SingleRactorMode + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v19:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] + v20:BasicObject = SendDirect v19, 0x1038, :foo (0x1048) + CheckInterrupts + Return v20 + "); + } + #[test] fn test_inline_attr_reader_constant() { eval(" diff --git a/zjit/src/lib.rs b/zjit/src/lib.rs index a79989c9125e02..50a48399c2c2d1 100644 --- a/zjit/src/lib.rs +++ b/zjit/src/lib.rs @@ -32,3 +32,14 @@ mod gc; mod payload; mod json; mod ttycolors; + +/// Pull in YJIT's symbols for linking the test binary in `make zjit-test`. The test binary builds +/// ZJIT symbols and they should take precendence over the ones built for miniruby, so libminiruby +/// doesn't include any ZJIT code. But, in removing from libminiruby the object which contains all +/// rust code, including ZJIT code, we also remove all YJIT symbols which the rest of libminiruby +/// might request in YJIT+ZJIT configurations. We add back the YJIT symbols here. +/// +/// Only relevant for YJIT+ZJIT configurations, but building YJIT is fast, so always do it for the +/// test binary for simplicity. +#[cfg(test)] +use yjit as _; diff --git a/zjit/src/options.rs b/zjit/src/options.rs index 5da5fd2d706a5d..99ccbcaf8b851c 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -6,6 +6,15 @@ use crate::cruby::*; use crate::stats::Counter; use std::collections::HashSet; +/// Type of symbols to dump into /tmp/perf-{pid}.map +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum PerfMap { + /// Dump one symbol per ISEQ + ISEQ, + /// Dump one symbol per HIR instruction + HIR, +} + /// Default --zjit-num-profiles const DEFAULT_NUM_PROFILES: NumProfiles = 5; pub type NumProfiles = u16; @@ -89,7 +98,7 @@ pub struct Options { pub trace_side_exits_sample_interval: usize, /// Dump code map to /tmp for performance profilers. - pub perf: bool, + pub perf: Option, /// List of ISEQs that can be compiled, identified by their iseq_get_location() pub allowed_iseqs: Option>, @@ -118,7 +127,7 @@ impl Default for Options { dump_disasm: None, trace_side_exits: None, trace_side_exits_sample_interval: 0, - perf: false, + perf: None, allowed_iseqs: None, log_compiled_iseqs: None, } @@ -141,7 +150,8 @@ pub const ZJIT_OPTIONS: &[(&str, &str)] = &[ "Collect ZJIT stats (=file to write to a file)."), ("--zjit-disable", "Disable ZJIT for lazily enabling it with RubyVM::ZJIT.enable."), - ("--zjit-perf", "Dump ISEQ symbols into /tmp/perf-{}.map for Linux perf."), + ("--zjit-perf[=iseq|hir]", + "Dump symbols for Linux perf /tmp/perf-{}.map (default: iseq)."), ("--zjit-log-compiled-iseqs=path", "Log compiled ISEQs to the file. The file will be truncated."), ("--zjit-trace-exits[=counter]", @@ -452,7 +462,8 @@ fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } } - ("perf", "") => options.perf = true, + ("perf", "" | "iseq") => options.perf = Some(PerfMap::ISEQ), + ("perf", "hir") => options.perf = Some(PerfMap::HIR), ("allowed-iseqs", _) if !opt_val.is_empty() => options.allowed_iseqs = Some(parse_jit_list(opt_val)), ("log-compiled-iseqs", _) if !opt_val.is_empty() => { diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 66f418067cfcc9..28380f140b75ae 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -4,7 +4,8 @@ use std::time::Instant; use std::sync::atomic::Ordering; use crate::options::OPTIONS; -#[cfg(feature = "stats_allocator")] +// test binaries always bring it in as a cargo dependency +#[cfg(all(feature = "stats_allocator", not(test)))] #[path = "../../jit/src/lib.rs"] mod jit; diff --git a/zjit/zjit.mk b/zjit/zjit.mk index eecb6fdecf03d1..dad4ece932cc56 100644 --- a/zjit/zjit.mk +++ b/zjit/zjit.mk @@ -133,7 +133,7 @@ zjit-test-rr: libminiruby.a libminiruby.a: miniruby$(EXEEXT) $(ECHO) linking static-library $@ $(Q) $(RM) $@ - $(Q) $(AR) $(ARFLAGS) $@ $(MINIOBJS) $(COMMONOBJS) + $(Q) $(AR) $(ARFLAGS) $@ $(MINIOBJS) $(COMMONOBJS:$(RUST_LIBOBJ)=) libminiruby: libminiruby.a