From 48dd4b14a1541aa867963ccb33f41f4dc64ce59b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 2 Apr 2026 14:54:24 -0700 Subject: [PATCH 1/5] ZJIT: Deduplicate redundant GuardType instructions in fold_constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When two GuardType instructions in the same basic block guard the same value with the same (or narrower) type, the second guard is redundant. For example, `(n - 1) + (n - 2)` generates two `GuardType n, Fixnum` — one for each subtraction — but after the first guard succeeds, the second is guaranteed to succeed as well. The new logic in fold_constants tracks every GuardType emitted so far in the current block. If a later GuardType's (val, guard_type) pair is already covered (i.e. guard_type is a subtype of the earlier guard's type), the later guard is replaced with the earlier result via make_equal_to. --- zjit/src/hir.rs | 23 +++++++++++++++ zjit/src/hir/opt_tests.rs | 59 ++++++++++++++++++++++++++++----------- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index fa911fc41da470..4e638cb9dae443 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -5239,6 +5239,10 @@ impl Function { for block in self.rpo() { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); let mut new_insns = vec![]; + // Track guards seen so far in this block: (val, guard_type, result_insn_id). + // When we encounter a GuardType whose (val, guard_type) pair is already covered + // by a previous guard, we can eliminate it by reusing the earlier result. + let mut seen_guards: Vec<(InsnId, Type, InsnId)> = vec![]; for insn_id in old_insns { let replacement_id = match self.find(insn_id) { Insn::GuardType { val, guard_type, .. } if self.is_a(val, guard_type) => { @@ -5246,6 +5250,25 @@ impl Function { // Don't bother re-inferring the type of val; we already know it. continue; } + // Deduplicate GuardType: if we already guarded the same val with a + // type that is the same or narrower, the new guard is redundant. + // e.g. if we already proved val is Fixnum, a later Fixnum or + // BasicObject guard on the same val is guaranteed to pass. + Insn::GuardType { val, guard_type, .. } => { + let mut found = None; + for &(prev_val, prev_type, prev_result) in &seen_guards { + if prev_val == val && prev_type.is_subtype(guard_type) { + found = Some(prev_result); + break; + } + } + if let Some(prev_result) = found { + self.make_equal_to(insn_id, prev_result); + continue; + } + seen_guards.push((val, guard_type, insn_id)); + insn_id + } Insn::LoadField { recv, offset, return_type, .. } if return_type.is_subtype(types::BasicObject) && u32::try_from(offset).is_ok() => { let offset = (offset as u32).to_usize(); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 5bef039ed123f1..afdafe74e7ef6e 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -224,8 +224,6 @@ mod hir_opt_tests { PatchPoint MethodRedefined(Integer@0x1008, *@0x1010, cme:0x1018) v35:Fixnum = GuardType v10, Fixnum v44:Fixnum[0] = Const Value(0) - v21:Fixnum[0] = Const Value(0) - v39:Fixnum = GuardType v10, Fixnum v45:Fixnum[0] = Const Value(0) PatchPoint MethodRedefined(Integer@0x1008, +@0x1040, cme:0x1048) v46:Fixnum[0] = Const Value(0) @@ -1633,8 +1631,7 @@ mod hir_opt_tests { v23:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] v24:BasicObject = SendDirect v23, 0x1038, :foo (0x1048) PatchPoint MethodRedefined(Object@0x1000, bar@0x1050, cme:0x1058) - v26:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] - v27:BasicObject = SendDirect v26, 0x1038, :bar (0x1048) + v27:BasicObject = SendDirect v23, 0x1038, :bar (0x1048) CheckInterrupts Return v27 "); @@ -1747,8 +1744,7 @@ mod hir_opt_tests { v16:Fixnum[20] = Const Value(20) v18:Fixnum[30] = Const Value(30) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) - v47:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] - v48:BasicObject = SendDirect v47, 0x1038, :target (0x1048), v14, v16, v18 + v48:BasicObject = SendDirect v44, 0x1038, :target (0x1048), v14, v16, v18 v24:Fixnum[10] = Const Value(10) v26:Fixnum[20] = Const Value(20) v28:Fixnum[30] = Const Value(30) @@ -3901,8 +3897,7 @@ mod hir_opt_tests { v24:Fixnum[4] = Const Value(4) v26:Fixnum[3] = Const Value(3) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) - v41:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] - v42:BasicObject = SendDirect v41, 0x1038, :foo (0x1048), v20, v22, v26, v24 + v42:BasicObject = SendDirect v37, 0x1038, :foo (0x1048), v20, v22, v26, v24 v30:ArrayExact = NewArray v38, v42 CheckInterrupts Return v30 @@ -3939,8 +3934,7 @@ mod hir_opt_tests { v22:Fixnum[40] = Const Value(40) v24:Fixnum[30] = Const Value(30) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) - v41:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] - v42:BasicObject = SendDirect v41, 0x1038, :foo (0x1048), v18, v20, v24, v22 + v42:BasicObject = SendDirect v37, 0x1038, :foo (0x1048), v18, v20, v24, v22 v28:ArrayExact = NewArray v38, v42 CheckInterrupts Return v28 @@ -3975,8 +3969,7 @@ mod hir_opt_tests { v20:Fixnum[30] = Const Value(30) v22:Fixnum[6] = Const Value(6) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) - v51:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] - v52:BasicObject = SendDirect v51, 0x1038, :target (0x1048), v16, v18, v20, v22 + v52:BasicObject = SendDirect v48, 0x1038, :target (0x1048), v16, v18, v20, v22 v27:Fixnum[10] = Const Value(10) v29:Fixnum[20] = Const Value(20) v31:Fixnum[30] = Const Value(30) @@ -6527,7 +6520,6 @@ mod hir_opt_tests { v22:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] v28:StaticSymbol[:b] = Const Value(VALUE(0x1038)) PatchPoint MethodRedefined(Object@0x1000, one@0x1040, cme:0x1048) - v26:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] CheckInterrupts Return v28 "); @@ -11812,7 +11804,6 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1008) PatchPoint MethodRedefined(String@0x1008, ==@0x1010, cme:0x1018) v26:StringExact = GuardType v10, StringExact - v27:String = GuardType v10, String v29:TrueClass = Const Value(true) CheckInterrupts Return v29 @@ -11842,7 +11833,6 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1008) PatchPoint MethodRedefined(String@0x1008, ===@0x1010, cme:0x1018) v25:StringExact = GuardType v10, StringExact - v26:String = GuardType v10, String v28:TrueClass = Const Value(true) CheckInterrupts Return v28 @@ -12254,7 +12244,6 @@ mod hir_opt_tests { PatchPoint MethodRedefined(String@0x1008, !=@0x1010, cme:0x1018) v26:StringExact = GuardType v10, StringExact PatchPoint MethodRedefined(String@0x1008, ==@0x1040, cme:0x1048) - v30:String = GuardType v10, String v35:TrueClass = Const Value(true) v32:TrueClass = Const Value(true) v33:CBool = IsBitNotEqual v35, v32 @@ -15295,4 +15284,42 @@ mod hir_opt_tests { Return v35 "); } + + #[test] + fn test_dedup_guard_type() { + // Two subtractions on the same Fixnum operand `n` each require a + // GuardType n, Fixnum. The second guard is redundant and should be + // eliminated by fold_constants. + eval(" + def test(n) + (n - 1) + (n - 2) + end + test 1; test 2 + "); + assert_snapshot!(hir_string("test"), @" + fn test@:3: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:CPtr = LoadSP + v3:BasicObject = LoadField v2, :n@0x1000 + Jump bb3(v1, v3) + bb2(): + EntryPoint JIT(0) + v6:BasicObject = LoadArg :self@0 + v7:BasicObject = LoadArg :n@1 + Jump bb3(v6, v7) + bb3(v9:BasicObject, v10:BasicObject): + v15:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Integer@0x1008, -@0x1010, cme:0x1018) + v35:Fixnum = GuardType v10, Fixnum + v36:Fixnum = FixnumSub v35, v15 + v21:Fixnum[2] = Const Value(2) + v40:Fixnum = FixnumSub v35, v21 + PatchPoint MethodRedefined(Integer@0x1008, +@0x1040, cme:0x1048) + v43:Fixnum = FixnumAdd v36, v40 + CheckInterrupts + Return v43 + "); + } } From 153ab6be11c68dada31375c98568381a6ad995f5 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 3 Apr 2026 14:41:29 -0700 Subject: [PATCH 2/5] ZJIT: Simplify guard dedup with Iterator::find --- zjit/src/hir.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 4e638cb9dae443..75949185fb1d70 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -5255,14 +5255,9 @@ impl Function { // e.g. if we already proved val is Fixnum, a later Fixnum or // BasicObject guard on the same val is guaranteed to pass. Insn::GuardType { val, guard_type, .. } => { - let mut found = None; - for &(prev_val, prev_type, prev_result) in &seen_guards { - if prev_val == val && prev_type.is_subtype(guard_type) { - found = Some(prev_result); - break; - } - } - if let Some(prev_result) = found { + if let Some(&(_, _, prev_result)) = seen_guards.iter().find( + |&&(prev_val, prev_type, _)| prev_val == val && prev_type.is_subtype(guard_type) + ) { self.make_equal_to(insn_id, prev_result); continue; } From 0fde80f0260e85a0b38a90d00134101a29a03487 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 3 Apr 2026 14:40:30 -0700 Subject: [PATCH 3/5] ZJIT: Add TODO to subsume guard dedup into GVN --- zjit/src/hir.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 75949185fb1d70..e79dbfb83bd361 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -5254,6 +5254,7 @@ impl Function { // type that is the same or narrower, the new guard is redundant. // e.g. if we already proved val is Fixnum, a later Fixnum or // BasicObject guard on the same val is guaranteed to pass. + // TODO: Move into global value numbering Insn::GuardType { val, guard_type, .. } => { if let Some(&(_, _, prev_result)) = seen_guards.iter().find( |&&(prev_val, prev_type, _)| prev_val == val && prev_type.is_subtype(guard_type) From 83447d558644a6c79a74cdb26b5c3043e6abeb9f Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 26 Mar 2026 07:11:51 -0700 Subject: [PATCH 4/5] ZJIT: Recompile getivar on guard_shape_failure --- zjit/src/backend/lir.rs | 14 ++++---- zjit/src/codegen.rs | 74 +++++++++++++++++++++++++++------------ zjit/src/hir.rs | 62 ++++++++++++++++++++++++++------ zjit/src/hir/opt_tests.rs | 59 +++++++++++++++++++++++++++++++ zjit/src/profile.rs | 21 +++++++++++ 5 files changed, 190 insertions(+), 40 deletions(-) diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 624fcd0389f6f2..e8cf64153ba0f5 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -557,12 +557,14 @@ pub struct SideExit { pub recompile: Option, } -/// Arguments for the no-profile-send recompile callback. +/// Arguments for the recompile callback on side exit. +/// Used for both no-profile sends (argc >= 0) and shape guard failures (argc = -1). #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct SideExitRecompile { pub iseq: Opnd, pub insn_idx: u32, - /// Number of arguments, not including the receiver. + /// Number of arguments (not including receiver) for send profiling. + /// -1 means profile self from CFP for shape guard exits. pub argc: i32, } @@ -2671,7 +2673,7 @@ impl Assembler if let Some(recompile) = &exit.recompile { if cfg!(feature = "runtime_checks") { // Clear jit_return to fully materialize the frame. This must happen - // before any C call in the exit path (e.g. no_profile_send_recompile) + // before any C call in the exit path (e.g. exit_recompile) // because that C call can trigger GC, which walks the stack and would // hit the CFP_JIT_RETURN assertion if jit_return still holds the // runtime_checks poison value (JIT_RETURN_POISON). @@ -2679,9 +2681,9 @@ impl Assembler asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), 0.into()); } - use crate::codegen::no_profile_send_recompile; - asm_comment!(asm, "profile and maybe recompile for no-profile send"); - asm_ccall!(asm, no_profile_send_recompile, + use crate::codegen::exit_recompile; + asm_comment!(asm, "profile and maybe recompile"); + asm_ccall!(asm, exit_recompile, EC, recompile.iseq, Opnd::UImm(recompile.insn_idx as u64), diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 7394d3d96adf9a..089df5eb9833e5 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -518,7 +518,7 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func Insn::InvokeBuiltin { .. } => SideExitReason::UnhandledHIRInvokeBuiltin, _ => SideExitReason::UnhandledHIRUnknown(insn_id), }; - gen_side_exit(&mut jit, &mut asm, &reason, &None, &function.frame_state(last_snapshot)); + gen_side_exit(&mut jit, &mut asm, &reason, None, &function.frame_state(last_snapshot)); // Don't bother generating code after a side-exit. We won't run it. // TODO(max): Generate ud2 or equivalent. break; @@ -689,7 +689,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::HasType { val, expected } => gen_has_type(jit, asm, opnd!(val), *expected), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), - &Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)), + &Insn::GuardBitEquals { val, expected, reason, state, recompile } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, recompile, &function.frame_state(state)), &Insn::GuardAnyBitSet { val, mask, reason, state, .. } => gen_guard_any_bit_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), &Insn::GuardNoBitsSet { val, mask, reason, state, .. } => gen_guard_no_bits_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), &Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), @@ -717,7 +717,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::SetClassVar { id, val, ic, state } => no_output!(gen_setclassvar(jit, asm, *id, opnd!(val), *ic, &function.frame_state(*state))), Insn::SetIvar { self_val, id, ic, val, state } => no_output!(gen_setivar(jit, asm, opnd!(self_val), *id, *ic, opnd!(val), &function.frame_state(*state))), Insn::FixnumBitCheck { val, index } => gen_fixnum_bit_check(asm, opnd!(val), *index), - Insn::SideExit { state, reason, recompile } => no_output!(gen_side_exit(jit, asm, reason, recompile, &function.frame_state(*state))), + Insn::SideExit { state, reason, recompile } => no_output!(gen_side_exit(jit, asm, reason, *recompile, &function.frame_state(*state))), Insn::PutSpecialObject { value_type } => gen_putspecialobject(asm, *value_type), Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state)), Insn::Defined { op_type, obj, pushval, v, state } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v), &function.frame_state(*state)), @@ -1238,14 +1238,8 @@ fn gen_setglobal(jit: &mut JITState, asm: &mut Assembler, id: ID, val: Opnd, sta } /// Side-exit into the interpreter -fn gen_side_exit(jit: &mut JITState, asm: &mut Assembler, reason: &SideExitReason, recompile: &Option, state: &FrameState) { - let mut exit = build_side_exit(jit, state); - exit.recompile = recompile.map(|argc| SideExitRecompile { - iseq: Opnd::Value(VALUE::from(jit.iseq)), - insn_idx: state.insn_idx() as u32, - argc, - }); - asm.jmp(Target::SideExit { exit, reason: *reason }); +fn gen_side_exit(jit: &mut JITState, asm: &mut Assembler, reason: &SideExitReason, recompile: Option, state: &FrameState) { + asm.jmp(side_exit_with_recompile(jit, state, *reason, recompile)); } /// Emit a special object lookup @@ -2637,7 +2631,7 @@ fn gen_guard_type_not(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, g } /// Compile an identity check with a side exit -fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, expected: crate::hir::Const, reason: SideExitReason, state: &FrameState) -> lir::Opnd { +fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, expected: crate::hir::Const, reason: SideExitReason, recompile: Option, state: &FrameState) -> lir::Opnd { if matches!(reason, SideExitReason::GuardShape(_) ) { gen_incr_counter(asm, Counter::guard_shape_count); } @@ -2648,7 +2642,7 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, _ => panic!("gen_guard_bit_equals: unexpected hir::Const {expected:?}"), }; asm.cmp(val, expected_opnd); - asm.jnz(jit, side_exit(jit, state, reason)); + asm.jnz(jit, side_exit_with_recompile(jit, state, reason, recompile)); val } @@ -2982,6 +2976,18 @@ fn side_exit(jit: &JITState, state: &FrameState, reason: SideExitReason) -> Targ Target::SideExit { exit, reason } } +/// Build a Target::SideExit that optionally triggers exit_recompile on the exit path. +/// When `recompile` is Some(argc), the side exit calls exit_recompile with that argc. +fn side_exit_with_recompile(jit: &JITState, state: &FrameState, reason: SideExitReason, recompile: Option) -> Target { + let mut exit = build_side_exit(jit, state); + exit.recompile = recompile.map(|argc| SideExitRecompile { + iseq: Opnd::Value(VALUE::from(jit.iseq)), + insn_idx: state.insn_idx() as u32, + argc, + }); + Target::SideExit { exit, reason } +} + /// Build a side-exit context fn build_side_exit(jit: &JITState, state: &FrameState) -> SideExit { let mut stack = Vec::new(); @@ -3037,26 +3043,48 @@ macro_rules! c_callable { pub(crate) use c_callable; c_callable! { - /// Called from JIT side-exit code when a send instruction had no profile data. This function - /// profiles the receiver and arguments on the stack, then (once enough profiles are gathered) - /// invalidates the current ISEQ version so that the ISEQ will be recompiled with the new - /// profile data on the next call. - pub(crate) fn no_profile_send_recompile(ec: EcPtr, iseq_raw: VALUE, insn_idx: u32, argc: i32) { + /// Called from JIT side-exit code to profile operands and trigger recompilation. + /// For send instructions (argc >= 0): profiles receiver + args from the stack. + /// For shape guard exits (argc == -1): profiles self from the CFP. + /// Once enough profiles are gathered, invalidates the ISEQ for recompilation. + pub(crate) fn exit_recompile(ec: EcPtr, iseq_raw: VALUE, insn_idx: u32, argc: i32) { + // Fast check before taking the VM lock: skip if already invalidated or + // at the version limit. This avoids expensive lock acquisition on every + // shape guard exit after the recompile has already been triggered. + { + let iseq: IseqPtr = iseq_raw.as_iseq(); + let payload = get_or_create_iseq_payload(iseq); + let already_done = payload.versions.last() + .map_or(false, |v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated) + || payload.versions.len() >= max_iseq_versions(); + if already_done { + return; + } + } + with_vm_lock(src_loc!(), || { let iseq: IseqPtr = iseq_raw.as_iseq(); let payload = get_or_create_iseq_payload(iseq); - // Already gathered enough profiles; nothing to do - if payload.profile.done_profiling_at(insn_idx as usize) { + // For no-profile sends, skip if already profiled at this insn_idx. + // For shape guard exits (argc == -1), always re-profile because the + // original YARV profiles were monomorphic but runtime showed new shapes. + if argc >= 0 && payload.profile.done_profiling_at(insn_idx as usize) { return; } with_time_stat(Counter::profile_time_ns, || { let cfp = unsafe { get_ec_cfp(ec) }; - let sp = unsafe { get_cfp_sp(cfp) }; - // Profile the receiver and arguments for this send instruction - let should_recompile = payload.profile.profile_send_at(iseq, insn_idx as usize, sp, argc as usize); + let should_recompile = if argc >= 0 { + let sp = unsafe { get_cfp_sp(cfp) }; + // Profile the receiver and arguments for this send instruction + payload.profile.profile_send_at(iseq, insn_idx as usize, sp, argc as usize) + } else { + // Profile self for shape guard exits (argc == -1) + let self_val = unsafe { get_cfp_self(cfp) }; + payload.profile.profile_self_at(iseq, insn_idx as usize, self_val) + }; // Once we have enough profiles, invalidate and recompile the ISEQ if should_recompile { diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index e79dbfb83bd361..ec1db4e7977e3e 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -7,7 +7,7 @@ #![allow(clippy::match_like_matches_macro)] use crate::{ backend::lir::C_ARG_OPNDS, - cast::IntoUsize, codegen::local_idx_to_ep_offset, cruby::*, invariants::{self}, payload::{get_or_create_iseq_payload, IseqPayload}, options::{debug, get_option, DumpHIR}, state::ZJITState, json::Json, + cast::IntoUsize, codegen::{local_idx_to_ep_offset, max_iseq_versions}, cruby::*, invariants::{self}, payload::{get_or_create_iseq_payload, IseqPayload, IseqStatus}, options::{debug, get_option, DumpHIR}, state::ZJITState, json::Json, state, }; use std::{ @@ -1090,7 +1090,9 @@ pub enum Insn { GuardType { val: InsnId, guard_type: Type, state: InsnId }, GuardTypeNot { val: InsnId, guard_type: Type, state: InsnId }, /// Side-exit if val is not the expected Const. - GuardBitEquals { val: InsnId, expected: Const, reason: SideExitReason, state: InsnId }, + /// `recompile`: if Some(argc), the side exit triggers exit_recompile. + /// argc >= 0 profiles receiver + args from stack; argc == -1 profiles self from CFP. + GuardBitEquals { val: InsnId, expected: Const, reason: SideExitReason, state: InsnId, recompile: Option }, /// Side-exit if (val & mask) == 0 GuardAnyBitSet { val: InsnId, mask: Const, mask_name: Option, reason: SideExitReason, state: InsnId }, /// Side-exit if (val & mask) != 0 @@ -2425,6 +2427,32 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq can_send } +/// Policy that controls how optimization passes generate code. +/// Determined at compile time based on the ISEQ's compilation history. +#[derive(Debug)] +struct CompilePolicy { + /// When true, optimization passes should avoid generating guards that + /// side-exit, and instead use fallback paths (e.g. C calls) on mismatch. + /// Set when this is the final version of an ISEQ after recompilation. + no_side_exits: bool, +} + +impl CompilePolicy { + fn new(iseq: *const rb_iseq_t) -> Self { + // When a previous version was invalidated and we've reached the version + // limit, avoid speculative optimizations that may side-exit. + let no_side_exits = if iseq.is_null() { + false + } else { + let payload = get_or_create_iseq_payload(iseq); + payload.versions.iter().any( + |v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated + ) && payload.versions.len() + 1 >= max_iseq_versions() + }; + Self { no_side_exits } + } +} + /// A [`Function`], which is analogous to a Ruby ISeq, is a control-flow graph of [`Block`]s /// containing instructions. #[derive(Debug)] @@ -2434,6 +2462,8 @@ pub struct Function { /// Whether previously, a function for this ISEQ was invalidated due to /// singleton class creation (violation of NoSingletonClass invariant). was_invalidated_for_singleton_class_creation: bool, + /// Controls code generation strategy for optimization passes. + policy: CompilePolicy, /// The types for the parameters of this function. They are copied to the type /// of entry block params after infer_types() fills Empty to all insn_types. param_types: Vec, @@ -2541,6 +2571,7 @@ impl Function { Function { iseq, was_invalidated_for_singleton_class_creation: false, + policy: CompilePolicy::new(iseq), insns: vec![], insn_types: vec![], union_find: UnionFind::new().into(), @@ -2782,7 +2813,7 @@ impl Function { &HasType { val, expected } => HasType { val: find!(val), expected }, &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, - &GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state }, + &GuardBitEquals { val, expected, reason, state, recompile } => GuardBitEquals { val: find!(val), expected, reason, state, recompile }, &GuardAnyBitSet { val, mask, mask_name, reason, state } => GuardAnyBitSet { val: find!(val), mask, mask_name, reason, state }, &GuardNoBitsSet { val, mask, mask_name, reason, state } => GuardNoBitsSet { val: find!(val), mask, mask_name, reason, state }, &GuardGreaterEq { left, right, reason, state } => GuardGreaterEq { left: find!(left), right: find!(right), reason, state }, @@ -4043,14 +4074,15 @@ impl Function { // Load ep[VM_ENV_DATA_INDEX_ME_CREF] let method_entry = fun.push_insn(block, Insn::LoadField { recv: lep, id: ID!(_ep_method_entry), offset: SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, return_type: types::RubyValue }); // Guard that it matches the expected CME - fun.push_insn(block, Insn::GuardBitEquals { val: method_entry, expected: Const::Value(current_cme.into()), reason: SideExitReason::GuardSuperMethodEntry, state }); + fun.push_insn(block, Insn::GuardBitEquals { val: method_entry, expected: Const::Value(current_cme.into()), reason: SideExitReason::GuardSuperMethodEntry, state, recompile: None }); let block_handler = fun.push_insn(block, Insn::LoadField { recv: lep, id: ID!(_ep_specval), offset: SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL, return_type: types::RubyValue }); fun.push_insn(block, Insn::GuardBitEquals { val: block_handler, expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), reason: SideExitReason::UnhandledBlockArg, - state + state, + recompile: None, }); } @@ -4360,12 +4392,13 @@ impl Function { }) } - fn guard_shape(&mut self, block: BlockId, val: InsnId, expected: ShapeId, state: InsnId) -> InsnId { + fn guard_shape(&mut self, block: BlockId, val: InsnId, expected: ShapeId, state: InsnId, recompile: Option) -> InsnId { self.push_insn(block, Insn::GuardBitEquals { val, expected: Const::CShape(expected), reason: SideExitReason::GuardShape(expected), - state + state, + recompile, }) } @@ -4501,9 +4534,16 @@ impl Function { self.count(block, Counter::getivar_fallback_too_complex); self.push_insn_id(block, insn_id); continue; } + if self.policy.no_side_exits { + // On the final version, skip GetIvar shape specialization. + // iseq_to_hir already generates polymorphic branches with a + // GetIvar C call fallback for getinstancevariable, so we don't + // need to wrap it again here. + self.push_insn_id(block, insn_id); continue; + } let self_val = self.load_ivar_guard_type(block, self_val, recv_type, state); let shape = self.load_shape(block, self_val); - self.guard_shape(block, shape, recv_type.shape(), state); + self.guard_shape(block, shape, recv_type.shape(), state, Some(-1)); let replacement = self.load_ivar(block, self_val, recv_type, id, state); self.make_equal_to(insn_id, replacement); } @@ -4532,7 +4572,7 @@ impl Function { } let self_val = self.load_ivar_guard_type(block, self_val, recv_type, state); let shape = self.load_shape(block, self_val); - self.guard_shape(block, shape, recv_type.shape(), state); + self.guard_shape(block, shape, recv_type.shape(), state, None); let mut ivar_index: u16 = 0; let replacement = if unsafe { rb_shape_get_iv_index(recv_type.shape().0, id, &mut ivar_index) } { self.push_insn(block, Insn::Const { val: Const::Value(pushval) }) @@ -4604,7 +4644,7 @@ impl Function { } let self_val = self.load_ivar_guard_type(block, self_val, recv_type, state); let shape = self.load_shape(block, self_val); - self.guard_shape(block, shape, recv_type.shape(), state); + self.guard_shape(block, shape, recv_type.shape(), state, None); // Current shape contains this ivar let (ivar_storage, offset) = if recv_type.flags().is_embedded() { // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h @@ -7552,7 +7592,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { match profiled_block_type { Some(ty) if ty.nil_p() => { - fun.push_insn(unmodified_block, Insn::GuardBitEquals { val: block_handler, expected: Const::CInt64(VM_BLOCK_HANDLER_NONE.into()), reason: SideExitReason::BlockParamProxyNotNil, state: exit_id }); + fun.push_insn(unmodified_block, Insn::GuardBitEquals { val: block_handler, expected: Const::CInt64(VM_BLOCK_HANDLER_NONE.into()), reason: SideExitReason::BlockParamProxyNotNil, state: exit_id, recompile: None }); let nil_val = fun.push_insn(unmodified_block, Insn::Const { val: Const::Value(Qnil) }); let mut unmodified_args = vec![nil_val]; if let Some(local) = original_local { unmodified_args.push(local); } diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index afdafe74e7ef6e..7feda691dad9e5 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -7317,6 +7317,65 @@ mod hir_opt_tests { "); } + #[test] + fn test_getivar_shape_guard_recompile() { + // Call with one shape to compile, then call with a different shape to + // trigger shape guard exits and recompilation. On the recompiled version, + // GetIvar stays as a C call because iseq_to_hir handles polymorphic + // branching at parse time for getinstancevariable. + eval(" + class C + def initialize(extra = false) + @bar = 0 if extra # changes the shape + @foo = 42 + end + def foo = @foo + end + + c = C.new + c.foo # profile + c.foo # compile (version 1 with shape guard) + d = C.new(true) # same class, different shape + 100.times { d.foo } # trigger shape guard exits -> recompile + 100.times { c.foo } # run recompiled version (version 2) + "); + // After recompilation, iseq_to_hir generates polymorphic branches at + // parse time using the exit-profiled shapes: two optimized LoadField + // fast paths plus a GetIvar C call fallback. + assert_snapshot!(hir_string_proc("C.new.method(:foo)"), @" + fn foo@:7: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + PatchPoint SingleRactorMode + v11:HeapBasicObject = GuardType v6, HeapBasicObject + v12:CShape = LoadField v11, :_shape_id@0x1000 + v14:CShape[0x1001] = Const CShape(0x1001) + v15:CBool = IsBitEqual v12, v14 + IfTrue v15, bb5() + v19:CShape[0x1002] = Const CShape(0x1002) + v20:CBool = IsBitEqual v12, v19 + IfTrue v20, bb6() + v24:BasicObject = GetIvar v11, :@foo + Jump bb4(v24) + bb5(): + v17:BasicObject = LoadField v11, :@foo@0x1003 + Jump bb4(v17) + bb6(): + v22:BasicObject = LoadField v11, :@foo@0x1004 + Jump bb4(v22) + bb4(v13:BasicObject): + CheckInterrupts + Return v13 + "); + } + #[test] fn test_optimize_getivar_on_module_embedded() { eval(" diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 1ea27dcb45cee3..ae9f0897010b9d 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -452,6 +452,27 @@ impl IseqProfile { entry.profiles_remaining == 0 } + /// Profile self for a shape guard exit at runtime. + /// This may be called on an instruction that was already profiled by YARV, + /// so we reset the counter to re-profile with the new shapes seen at runtime. + /// Returns true if enough profiles have been gathered and the ISEQ should be recompiled. + pub fn profile_self_at(&mut self, iseq: IseqPtr, insn_idx: usize, self_val: VALUE) -> bool { + let entry = self.entry_mut(insn_idx); + // Reset profiling if the previous round already finished (stale YARV profiles). + // This ensures we collect num_profiles samples of the new shapes before recompiling. + if entry.profiles_remaining == 0 { + entry.profiles_remaining = get_option!(num_profiles); + } + if entry.opnd_types.is_empty() { + entry.opnd_types.resize(1, TypeDistribution::new()); + } + let ty = ProfiledType::new(self_val); + VALUE::from(iseq).write_barrier(ty.class()); + entry.opnd_types[0].observe(ty); + entry.profiles_remaining = entry.profiles_remaining.saturating_sub(1); + entry.profiles_remaining == 0 + } + /// Get profiled operand types for a given instruction index pub fn get_operand_types(&self, insn_idx: usize) -> Option<&[TypeDistribution]> { self.entry(insn_idx).map(|e| e.opnd_types.as_slice()).filter(|s| !s.is_empty()) From 6ab9b22553ac802819aa1643a9ac9575e75d1286 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 3 Apr 2026 14:55:23 -0700 Subject: [PATCH 5/5] ZJIT: Add IseqVersion::is_invalidated() method Address review feedback: use a method instead of directly comparing .status == IseqStatus::Invalidated. --- zjit/src/codegen.rs | 4 ++-- zjit/src/hir.rs | 4 ++-- zjit/src/payload.rs | 5 +++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 089df5eb9833e5..3e715e727d7337 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -222,7 +222,7 @@ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool) /// GC) so that all compile/recompile tuning decisions live in one place. pub fn invalidate_iseq_version(cb: &mut CodeBlock, iseq: IseqPtr, version: &mut IseqVersionRef) { let payload = get_or_create_iseq_payload(iseq); - if unsafe { version.as_ref() }.status != IseqStatus::Invalidated + if !unsafe { version.as_ref() }.is_invalidated() && payload.versions.len() < max_iseq_versions() { unsafe { version.as_mut() }.status = IseqStatus::Invalidated; @@ -3055,7 +3055,7 @@ c_callable! { let iseq: IseqPtr = iseq_raw.as_iseq(); let payload = get_or_create_iseq_payload(iseq); let already_done = payload.versions.last() - .map_or(false, |v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated) + .map_or(false, |v| unsafe { v.as_ref() }.is_invalidated()) || payload.versions.len() >= max_iseq_versions(); if already_done { return; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index ec1db4e7977e3e..1f0d0764c8e452 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -7,7 +7,7 @@ #![allow(clippy::match_like_matches_macro)] use crate::{ backend::lir::C_ARG_OPNDS, - cast::IntoUsize, codegen::{local_idx_to_ep_offset, max_iseq_versions}, cruby::*, invariants::{self}, payload::{get_or_create_iseq_payload, IseqPayload, IseqStatus}, options::{debug, get_option, DumpHIR}, state::ZJITState, json::Json, + cast::IntoUsize, codegen::{local_idx_to_ep_offset, max_iseq_versions}, cruby::*, invariants::{self}, payload::{get_or_create_iseq_payload, IseqPayload}, options::{debug, get_option, DumpHIR}, state::ZJITState, json::Json, state, }; use std::{ @@ -2446,7 +2446,7 @@ impl CompilePolicy { } else { let payload = get_or_create_iseq_payload(iseq); payload.versions.iter().any( - |v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated + |v| unsafe { v.as_ref() }.is_invalidated() ) && payload.versions.len() + 1 >= max_iseq_versions() }; Self { no_side_exits } diff --git a/zjit/src/payload.rs b/zjit/src/payload.rs index 567c6e9eea10f8..010972bdae19a9 100644 --- a/zjit/src/payload.rs +++ b/zjit/src/payload.rs @@ -51,6 +51,11 @@ pub struct IseqVersion { pub type IseqVersionRef = NonNull; impl IseqVersion { + /// Check if this version was invalidated + pub fn is_invalidated(&self) -> bool { + self.status == IseqStatus::Invalidated + } + /// Allocate a new IseqVersion to be compiled pub fn new(iseq: IseqPtr) -> IseqVersionRef { let version = Self {