diff --git a/src/simlin-engine/examples/clearn_profile.rs b/src/simlin-engine/examples/clearn_profile.rs index 7cc9a774b..7fc0f2297 100644 --- a/src/simlin-engine/examples/clearn_profile.rs +++ b/src/simlin-engine/examples/clearn_profile.rs @@ -226,6 +226,17 @@ fn main() { println!(" {name:<22} {count:>9} {pct:>5.1}%"); } + let fused_total: usize = prof.fused_histogram.values().sum(); + let mut fhist: Vec<_> = prof.fused_histogram.iter().collect(); + fhist.sort_by(|a, b| b.1.cmp(a.1)); + println!(" post-fusion flow+stock stream: {fused_total} opcodes; fused-binop counts:"); + for (name, count) in fhist + .iter() + .filter(|(n, _)| n.starts_with("Bin") || n.starts_with("Assign")) + { + println!(" {name:<22} {count:>9}"); + } + let mut vm = phase("Vm::new", || Vm::new(compiled.clone()).unwrap()); println!(" variables (offsets): {}", vm.names_as_strs().len()); diff --git a/src/simlin-engine/src/bytecode.rs b/src/simlin-engine/src/bytecode.rs index 71d157041..59f5ffe6a 100644 --- a/src/simlin-engine/src/bytecode.rs +++ b/src/simlin-engine/src/bytecode.rs @@ -285,6 +285,20 @@ impl RuntimeView { let mut flat = self.offset as usize; + // Dense fast-path: the overwhelmingly common case is no sparse mappings. + // This function is a per-element hot spot (called from the vector-op / + // reducer dispatch sites). When `sparse` is empty the sparse_lookup + // build below is pure overhead -- every entry would be `None`, so each + // `actual_idx` equals `idx` -- making this branch numerically identical + // to the general path while skipping the SmallVec allocation/scan and + // the per-index Option check. + if self.sparse.is_empty() { + for (i, &idx) in indices.iter().enumerate() { + flat += idx as usize * self.strides[i] as usize; + } + return flat; + } + // Build a quick lookup for sparse dimensions let sparse_lookup: SmallVec<[Option<&[u16]>; 4]> = (0..self.dims.len()) .map(|i| { @@ -683,6 +697,244 @@ pub(crate) enum Opcode { op: Op2, }, + // === 3-ADDRESS BINARY OPS WITH GLOBAL OPERANDS (R2 extension) === + // Mirror the `Bin*` pushing forms above, but for the leaf operands that are + // GLOBALS (TIME / DT / INITIAL_TIME / FINAL_TIME, loaded by `LoadGlobalVar`). + // The original `Bin*` forms missed these because no binop fused a global + // operand, so a `LoadGlobalVar` operand stayed an unfused load. + // + // CRITICAL: a `_global` field indexes `curr[g]` directly (an absolute global + // slot, NO `module_off` -- exactly like `LoadGlobalVar`), while a plain + // var field indexes `curr[module_off + v]` (module-relative, like `LoadVar`). + // Inside a submodule (`module_off > 0`) those are different slots; conflating + // them is a silent miscompile. The operand order is `l op r` matching the + // original load order, load-bearing for the non-commutative Sub/Div. + /// Push `curr[l_global] op curr[module_off + r]`. + BinGlobalVar { + l_global: VariableOffset, + r: VariableOffset, + op: Op2, + }, + /// Push `curr[module_off + l] op curr[r_global]`. + BinVarGlobal { + l: VariableOffset, + r_global: VariableOffset, + op: Op2, + }, + /// Push `curr[l_global] op literals[r]`. + BinGlobalConst { + l_global: VariableOffset, + r: LiteralId, + op: Op2, + }, + /// Push `literals[l] op curr[r_global]`. + BinConstGlobal { + l: LiteralId, + r_global: VariableOffset, + op: Op2, + }, + /// Push `curr[l_global] op curr[r_global]`. + BinGlobalGlobal { + l_global: VariableOffset, + r_global: VariableOffset, + op: Op2, + }, + /// Pop `lhs`; push `lhs op curr[r_global]`. + BinStackGlobal { + r_global: VariableOffset, + op: Op2, + }, + + // === 3-ADDRESS BINARY OP WITH TWO CONSTANT OPERANDS (R2 extension) === + // The greedy 3-window missed `LoadConstant; LoadConstant; Op2` (both operands + // are separate literals) because no `Bin*` form took two constant leaves. + // This is NOT compile-time constant folding of the result: the two operands + // are distinct interned literals, so we fuse the two loads + the op into one + // dispatch that still computes `literals[l] op literals[r]` at run time. + /// Push `literals[l] op literals[r]`. + BinConstConst { + l: LiteralId, + r: LiteralId, + op: Op2, + }, + + // === 3-ADDRESS FUSED LEAF ASSIGNMENTS (R2 extension) === + // A leaf assignment `dst = a op b` is, post-`peephole_optimize`, + // `LoadX a; LoadX b; BinOpAssign{Curr|Next}(op, dst)` (3 dispatches). These + // fold all three into one register-style op that reads its operands straight + // from `curr[]`/`literals` and writes the result straight to `curr[]`/`next[]` + // -- no stack push or pop at all. Like the 2-operand pushing forms above they + // are created only by the late `fuse_three_address` pass on final concrete + // bytecode and never enter the symbolic/incremental layer. + // + // The operator is encoded in the variant tag (one variant per {Add,Sub,Mul, + // Div}) rather than an `Op2` payload field. That keeps the payload at 3xu16 = + // 6 bytes so `size_of::()` stays at 8; a shared `Op2` field would make + // the payload 7 bytes and blow the budget. The four operators cover ~100% of + // measured leaf-assign candidates; any other operator is left in its existing + // `BinOpAssign{Curr|Next}` form. Encoding the operator in the tag also removes + // the per-dispatch `eval_op2` operator branch: each arm is straight-line f64 + // arithmetic. The operand order is `l op r` matching the original load order, + // which is load-bearing for the non-commutative Sub and Div. + + // -- VarVar: `c[dst] = c[l] OP c[r]` (Curr) / `n[dst] = c[l] OP c[r]` (Next) -- + AssignAddVarVarCurr { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + AssignSubVarVarCurr { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + AssignMulVarVarCurr { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + AssignDivVarVarCurr { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + AssignAddVarVarNext { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + AssignSubVarVarNext { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + AssignMulVarVarNext { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + AssignDivVarVarNext { + l: VariableOffset, + r: VariableOffset, + dst: VariableOffset, + }, + + // -- VarConst: `c[dst] = c[l] OP literals[r]` (l is a var, r is a literal) -- + AssignAddVarConstCurr { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + AssignSubVarConstCurr { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + AssignMulVarConstCurr { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + AssignDivVarConstCurr { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + AssignAddVarConstNext { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + AssignSubVarConstNext { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + AssignMulVarConstNext { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + AssignDivVarConstNext { + l: VariableOffset, + r: LiteralId, + dst: VariableOffset, + }, + + // -- ConstVar: `c[dst] = literals[l] OP c[r]` (l is a literal, r is a var) -- + AssignAddConstVarCurr { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + AssignSubConstVarCurr { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + AssignMulConstVarCurr { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + AssignDivConstVarCurr { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + AssignAddConstVarNext { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + AssignSubConstVarNext { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + AssignMulConstVarNext { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + AssignDivConstVarNext { + l: LiteralId, + r: VariableOffset, + dst: VariableOffset, + }, + + // === 2-ADDRESS STACK-LEAF FUSED ASSIGNMENTS (R2 extension) === + // `lhs` is already on the arithmetic stack (a nested subexpression result); + // the rhs is a leaf load. Post-peephole this is `LoadX b; BinOpAssign(op, dst)` + // (2 dispatches): pop the lhs, combine with the leaf rhs, store. Folds 2->1. + // Here the operator stays in the payload (the `{dst, b}` + `op` form is 5 + // bytes, still within budget) so all operators -- not just {Add,Sub,Mul,Div} + // -- are handled by one variant per (Var/Const, Curr/Next) combo. + /// Pop `lhs`; `curr[module_off + dst] = lhs op curr[module_off + b]`. + AssignStackVarCurr { + dst: VariableOffset, + b: VariableOffset, + op: Op2, + }, + /// Pop `lhs`; `next[module_off + dst] = lhs op curr[module_off + b]`. + AssignStackVarNext { + dst: VariableOffset, + b: VariableOffset, + op: Op2, + }, + /// Pop `lhs`; `curr[module_off + dst] = lhs op literals[b]`. + AssignStackConstCurr { + dst: VariableOffset, + b: LiteralId, + op: Op2, + }, + /// Pop `lhs`; `next[module_off + dst] = lhs op literals[b]`. + AssignStackConstNext { + dst: VariableOffset, + b: LiteralId, + op: Op2, + }, + // ========================================================================= // ARRAY SUPPORT (new) // ========================================================================= @@ -1025,6 +1277,52 @@ impl Opcode { } Opcode::BinStackVar { .. } | Opcode::BinStackConst { .. } => (1, 1), + // Global-operand pushing forms mirror the var/const forms above: the + // two-leaf forms read both operands from curr/literals and push + // (0 pops, 1 push); BinStackGlobal pops the lhs and pushes (1, 1). + // BinConstConst reads both literals and pushes (0, 1). + Opcode::BinGlobalVar { .. } + | Opcode::BinVarGlobal { .. } + | Opcode::BinGlobalConst { .. } + | Opcode::BinConstGlobal { .. } + | Opcode::BinGlobalGlobal { .. } + | Opcode::BinConstConst { .. } => (0, 1), + Opcode::BinStackGlobal { .. } => (1, 1), + + // 3-address fused leaf assignments read operands from curr/literals + // and write straight to curr/next: no arithmetic-stack traffic. + Opcode::AssignAddVarVarCurr { .. } + | Opcode::AssignSubVarVarCurr { .. } + | Opcode::AssignMulVarVarCurr { .. } + | Opcode::AssignDivVarVarCurr { .. } + | Opcode::AssignAddVarVarNext { .. } + | Opcode::AssignSubVarVarNext { .. } + | Opcode::AssignMulVarVarNext { .. } + | Opcode::AssignDivVarVarNext { .. } + | Opcode::AssignAddVarConstCurr { .. } + | Opcode::AssignSubVarConstCurr { .. } + | Opcode::AssignMulVarConstCurr { .. } + | Opcode::AssignDivVarConstCurr { .. } + | Opcode::AssignAddVarConstNext { .. } + | Opcode::AssignSubVarConstNext { .. } + | Opcode::AssignMulVarConstNext { .. } + | Opcode::AssignDivVarConstNext { .. } + | Opcode::AssignAddConstVarCurr { .. } + | Opcode::AssignSubConstVarCurr { .. } + | Opcode::AssignMulConstVarCurr { .. } + | Opcode::AssignDivConstVarCurr { .. } + | Opcode::AssignAddConstVarNext { .. } + | Opcode::AssignSubConstVarNext { .. } + | Opcode::AssignMulConstVarNext { .. } + | Opcode::AssignDivConstVarNext { .. } => (0, 0), + + // Stack-leaf fused assignments pop the pre-existing lhs (1 pop) and + // write the combined result to curr/next (no push). + Opcode::AssignStackVarCurr { .. } + | Opcode::AssignStackVarNext { .. } + | Opcode::AssignStackConstCurr { .. } + | Opcode::AssignStackConstNext { .. } => (1, 0), + // View stack ops don't touch arithmetic stack Opcode::PushVarView { .. } | Opcode::PushTempView { .. } @@ -1118,8 +1416,43 @@ impl Opcode { Opcode::BinConstVar { .. } => "BinConstVar", Opcode::BinStackVar { .. } => "BinStackVar", Opcode::BinStackConst { .. } => "BinStackConst", + Opcode::BinGlobalVar { .. } => "BinGlobalVar", + Opcode::BinVarGlobal { .. } => "BinVarGlobal", + Opcode::BinGlobalConst { .. } => "BinGlobalConst", + Opcode::BinConstGlobal { .. } => "BinConstGlobal", + Opcode::BinGlobalGlobal { .. } => "BinGlobalGlobal", + Opcode::BinStackGlobal { .. } => "BinStackGlobal", + Opcode::BinConstConst { .. } => "BinConstConst", Opcode::BinOpAssignCurr { .. } => "BinOpAssignCurr", Opcode::BinOpAssignNext { .. } => "BinOpAssignNext", + Opcode::AssignAddVarVarCurr { .. } => "AssignAddVarVarCurr", + Opcode::AssignSubVarVarCurr { .. } => "AssignSubVarVarCurr", + Opcode::AssignMulVarVarCurr { .. } => "AssignMulVarVarCurr", + Opcode::AssignDivVarVarCurr { .. } => "AssignDivVarVarCurr", + Opcode::AssignAddVarVarNext { .. } => "AssignAddVarVarNext", + Opcode::AssignSubVarVarNext { .. } => "AssignSubVarVarNext", + Opcode::AssignMulVarVarNext { .. } => "AssignMulVarVarNext", + Opcode::AssignDivVarVarNext { .. } => "AssignDivVarVarNext", + Opcode::AssignAddVarConstCurr { .. } => "AssignAddVarConstCurr", + Opcode::AssignSubVarConstCurr { .. } => "AssignSubVarConstCurr", + Opcode::AssignMulVarConstCurr { .. } => "AssignMulVarConstCurr", + Opcode::AssignDivVarConstCurr { .. } => "AssignDivVarConstCurr", + Opcode::AssignAddVarConstNext { .. } => "AssignAddVarConstNext", + Opcode::AssignSubVarConstNext { .. } => "AssignSubVarConstNext", + Opcode::AssignMulVarConstNext { .. } => "AssignMulVarConstNext", + Opcode::AssignDivVarConstNext { .. } => "AssignDivVarConstNext", + Opcode::AssignAddConstVarCurr { .. } => "AssignAddConstVarCurr", + Opcode::AssignSubConstVarCurr { .. } => "AssignSubConstVarCurr", + Opcode::AssignMulConstVarCurr { .. } => "AssignMulConstVarCurr", + Opcode::AssignDivConstVarCurr { .. } => "AssignDivConstVarCurr", + Opcode::AssignAddConstVarNext { .. } => "AssignAddConstVarNext", + Opcode::AssignSubConstVarNext { .. } => "AssignSubConstVarNext", + Opcode::AssignMulConstVarNext { .. } => "AssignMulConstVarNext", + Opcode::AssignDivConstVarNext { .. } => "AssignDivConstVarNext", + Opcode::AssignStackVarCurr { .. } => "AssignStackVarCurr", + Opcode::AssignStackVarNext { .. } => "AssignStackVarNext", + Opcode::AssignStackConstCurr { .. } => "AssignStackConstCurr", + Opcode::AssignStackConstNext { .. } => "AssignStackConstNext", Opcode::PushVarView { .. } => "PushVarView", Opcode::PushTempView { .. } => "PushTempView", Opcode::PushStaticView { .. } => "PushStaticView", @@ -1208,16 +1541,34 @@ pub struct StaticArrayView { } impl StaticArrayView { - /// Convert to a RuntimeView for use on the view stack + /// Convert to a RuntimeView for use on the view stack. + /// + /// This runs on every `PushStaticView` (~1M times on a C-LEARN run), so the + /// per-field copies are deliberately the cheapest correct construction. + /// + /// `SmallVec`'s `Clone` (with the `specialization` feature off, as here) + /// lowers to `self.as_slice().iter().cloned().collect()` -- an element-wise + /// `Extend`, NOT a memcpy -- even for `Copy` elements. For the three + /// `Copy`-element vectors we instead call `from_slice`, which copies the + /// inline buffer in one `ptr::copy_nonoverlapping`. `sparse`'s element type + /// (`RuntimeSparseMapping`) is not `Copy`, but it is empty for every dense + /// (non-star-range) view -- the overwhelmingly common case -- so we take a + /// free fresh empty `SmallVec` then and only fall back to a real clone for a + /// genuinely sparse view. pub fn to_runtime_view(&self) -> RuntimeView { + let sparse = if self.sparse.is_empty() { + SmallVec::new() + } else { + self.sparse.clone() + }; RuntimeView { base_off: self.base_off, is_temp: self.is_temp, - dims: self.dims.clone(), - strides: self.strides.clone(), + dims: SmallVec::from_slice(&self.dims), + strides: SmallVec::from_slice(&self.strides), offset: self.offset, - sparse: self.sparse.clone(), - dim_ids: self.dim_ids.clone(), + sparse, + dim_ids: SmallVec::from_slice(&self.dim_ids), is_valid: true, } } @@ -1377,45 +1728,6 @@ impl ByteCode { } max_depth } - - /// Estimate the opcode count after a 3-address fusion pass that folds leaf - /// operand loads into the binary op that consumes them (R2). Greedy, - /// post-peephole semantics: `LoadX; LoadY; Op2` fuses 3->1 (both operands - /// are leaf loads) and `LoadX; Op2` fuses 2->1 (one operand is a leaf load, - /// the other is already on the stack), where a leaf load is `LoadVar`, - /// `LoadGlobalVar`, or `LoadConstant`. Used only to size the win before - /// implementing the pass; the real pass must additionally fix up jumps. - pub(crate) fn estimate_fused_len(&self) -> usize { - fn is_leaf_load(op: &Opcode) -> bool { - matches!( - op, - Opcode::LoadVar { .. } | Opcode::LoadGlobalVar { .. } | Opcode::LoadConstant { .. } - ) - } - let code = &self.code; - let mut i = 0; - let mut emitted = 0; - while i < code.len() { - if i + 2 < code.len() - && is_leaf_load(&code[i]) - && is_leaf_load(&code[i + 1]) - && matches!(code[i + 2], Opcode::Op2 { .. }) - { - emitted += 1; - i += 3; - } else if i + 1 < code.len() - && is_leaf_load(&code[i]) - && matches!(code[i + 1], Opcode::Op2 { .. }) - { - emitted += 1; - i += 2; - } else { - emitted += 1; - i += 1; - } - } - emitted - } } #[cfg_attr(feature = "debug-derive", derive(Debug))] @@ -1565,9 +1877,22 @@ impl ByteCode { } /// Late 3-address fusion pass (R2): fold the leaf operand load(s) of a - /// binary op into the op itself, so `LoadX; LoadY; Op2` becomes one - /// `BinXY` (3->1) and `LoadX; Op2` (lhs already on the stack) becomes one - /// `BinStackX` (2->1). + /// binary op into the op itself. + /// + /// Two families of pattern, both longest-match-first within their window: + /// - Pushing subexpressions: `LoadX; LoadY; Op2` -> one `Bin*` (3->1) and + /// `LoadX; Op2` (lhs already on the stack) -> one `BinStack*` (2->1). + /// - Leaf assignments: post-`peephole_optimize` a leaf assign is `LoadX; + /// LoadY; BinOpAssign{Curr|Next}` (the peephole already folded the trailing + /// `Op2; AssignCurr` into `BinOpAssignCurr`). This pass folds the whole + /// thing into one register-style `Assign{Add|Sub|Mul|Div}{combo}{phase}` + /// (3->1) that reads operands from curr/literals and writes straight to + /// curr/next with no stack traffic. The 2-window analogue `LoadX; + /// BinOpAssign` (lhs on the stack) folds into `AssignStack{Var|Const}{phase}` + /// (2->1). The 3-operand forms exist only for {Add,Sub,Mul,Div} (the + /// operator is in the variant tag to keep the opcode within 8 bytes); + /// other operators keep their `BinOpAssign` form. The 2-operand stack-leaf + /// forms keep the operator in the payload so they cover every operator. /// /// MUST run only on FINAL concrete bytecode -- after `peephole_optimize` /// and, for the incremental path, after `resolve` -- because the fused @@ -1585,6 +1910,62 @@ impl ByteCode { return; } + // The four operators that have dedicated leaf-assign opcodes. Any other + // operator leaves the leaf assign in its `BinOpAssign{Curr|Next}` form. + // `is_phase_next` selects the Curr vs Next family. `l`/`r` keep the + // original load order (load-bearing for the non-commutative Sub/Div). + fn fused_leaf_var_var(op: Op2, is_next: bool, l: u16, r: u16, dst: u16) -> Option { + Some(match (op, is_next) { + (Op2::Add, false) => Opcode::AssignAddVarVarCurr { l, r, dst }, + (Op2::Sub, false) => Opcode::AssignSubVarVarCurr { l, r, dst }, + (Op2::Mul, false) => Opcode::AssignMulVarVarCurr { l, r, dst }, + (Op2::Div, false) => Opcode::AssignDivVarVarCurr { l, r, dst }, + (Op2::Add, true) => Opcode::AssignAddVarVarNext { l, r, dst }, + (Op2::Sub, true) => Opcode::AssignSubVarVarNext { l, r, dst }, + (Op2::Mul, true) => Opcode::AssignMulVarVarNext { l, r, dst }, + (Op2::Div, true) => Opcode::AssignDivVarVarNext { l, r, dst }, + _ => return None, + }) + } + fn fused_leaf_var_const( + op: Op2, + is_next: bool, + l: u16, + r: u16, + dst: u16, + ) -> Option { + Some(match (op, is_next) { + (Op2::Add, false) => Opcode::AssignAddVarConstCurr { l, r, dst }, + (Op2::Sub, false) => Opcode::AssignSubVarConstCurr { l, r, dst }, + (Op2::Mul, false) => Opcode::AssignMulVarConstCurr { l, r, dst }, + (Op2::Div, false) => Opcode::AssignDivVarConstCurr { l, r, dst }, + (Op2::Add, true) => Opcode::AssignAddVarConstNext { l, r, dst }, + (Op2::Sub, true) => Opcode::AssignSubVarConstNext { l, r, dst }, + (Op2::Mul, true) => Opcode::AssignMulVarConstNext { l, r, dst }, + (Op2::Div, true) => Opcode::AssignDivVarConstNext { l, r, dst }, + _ => return None, + }) + } + fn fused_leaf_const_var( + op: Op2, + is_next: bool, + l: u16, + r: u16, + dst: u16, + ) -> Option { + Some(match (op, is_next) { + (Op2::Add, false) => Opcode::AssignAddConstVarCurr { l, r, dst }, + (Op2::Sub, false) => Opcode::AssignSubConstVarCurr { l, r, dst }, + (Op2::Mul, false) => Opcode::AssignMulConstVarCurr { l, r, dst }, + (Op2::Div, false) => Opcode::AssignDivConstVarCurr { l, r, dst }, + (Op2::Add, true) => Opcode::AssignAddConstVarNext { l, r, dst }, + (Op2::Sub, true) => Opcode::AssignSubConstVarNext { l, r, dst }, + (Op2::Mul, true) => Opcode::AssignMulConstVarNext { l, r, dst }, + (Op2::Div, true) => Opcode::AssignDivConstVarNext { l, r, dst }, + _ => return None, + }) + } + // 1. Build set of PCs that are jump targets. let mut jump_targets = vec![false; self.code.len()]; for (pc, op) in self.code.iter().enumerate() { @@ -1607,41 +1988,83 @@ impl ByteCode { while i < self.code.len() { let new_pc = optimized.len(); - // 3-window: [leaf load, leaf load, Op2]. Both absorbed instructions - // (i+1, i+2) must not be jump targets. - let three = i + 2 < self.code.len() - && !jump_targets[i + 1] - && !jump_targets[i + 2] - && matches!(self.code[i + 2], Opcode::Op2 { .. }); + // 3-window: [leaf load, leaf load, ] where the combiner is + // either an `Op2` (a pushing subexpression) or a `BinOpAssign{Curr| + // Next}` (a leaf assignment, post-peephole). Both absorbed + // instructions (i+1, i+2) must not be jump targets. + // + // The leaf-assign forms are tried first: a `BinOpAssign` third op + // means the whole `dst = a op b` collapses into one register-style + // op (3->1) rather than `Bin*` (3->1 pushing) + a separate store. + // Only {Add,Sub,Mul,Div} have dedicated leaf-assign opcodes; any + // other operator falls through and keeps the existing form. + let three = i + 2 < self.code.len() && !jump_targets[i + 1] && !jump_targets[i + 2]; let fused3 = if three { - match (&self.code[i], &self.code[i + 1], &self.code[i + 2]) { - ( - Opcode::LoadVar { off: l }, - Opcode::LoadVar { off: r }, - Opcode::Op2 { op }, - ) => Some(Opcode::BinVarVar { - l: *l, - r: *r, - op: *op, - }), - ( - Opcode::LoadVar { off: l }, - Opcode::LoadConstant { id: r }, - Opcode::Op2 { op }, - ) => Some(Opcode::BinVarConst { - l: *l, - r: *r, - op: *op, - }), - ( - Opcode::LoadConstant { id: l }, - Opcode::LoadVar { off: r }, - Opcode::Op2 { op }, - ) => Some(Opcode::BinConstVar { - l: *l, - r: *r, - op: *op, - }), + // Decode the combiner once into two mutually-exclusive options: + // `assign3 = (op, dst, is_next)` for a leaf-assign, or + // `push3 = op` for a pushing Op2. + let (assign3, push3) = match &self.code[i + 2] { + Opcode::BinOpAssignCurr { op, off } => (Some((*op, *off, false)), None), + Opcode::BinOpAssignNext { op, off } => (Some((*op, *off, true)), None), + Opcode::Op2 { op } => (None, Some(*op)), + _ => (None, None), + }; + match (&self.code[i], &self.code[i + 1]) { + // Leaf assignment `dst = a op b` -> one fused op (3->1). + (Opcode::LoadVar { off: l }, Opcode::LoadVar { off: r }) => assign3 + .and_then(|(op, dst, n)| fused_leaf_var_var(op, n, *l, *r, dst)) + .or_else(|| push3.map(|op| Opcode::BinVarVar { l: *l, r: *r, op })), + (Opcode::LoadVar { off: l }, Opcode::LoadConstant { id: r }) => assign3 + .and_then(|(op, dst, n)| fused_leaf_var_const(op, n, *l, *r, dst)) + .or_else(|| push3.map(|op| Opcode::BinVarConst { l: *l, r: *r, op })), + (Opcode::LoadConstant { id: l }, Opcode::LoadVar { off: r }) => assign3 + .and_then(|(op, dst, n)| fused_leaf_const_var(op, n, *l, *r, dst)) + .or_else(|| push3.map(|op| Opcode::BinConstVar { l: *l, r: *r, op })), + // Two constant leaves: no leaf-assign form, so this only fuses + // a pushing `Op2`. Computes `literals[l] op literals[r]` at run + // time (NOT compile-time folding -- the operands are two + // distinct interned literals). + (Opcode::LoadConstant { id: l }, Opcode::LoadConstant { id: r }) => { + push3.map(|op| Opcode::BinConstConst { l: *l, r: *r, op }) + } + // Global-operand leaf pairs. A global has no dedicated + // leaf-assign opcode, so these fuse only a pushing `Op2`; a + // `BinOpAssign` combiner (push3 == None) falls through to the + // 2-window, which folds the rhs+store and leaves the global + // load as a standalone push. `l_global`/`r_global` index + // `curr[g]` (absolute), the var operand `curr[module_off + v]`. + (Opcode::LoadGlobalVar { off: l }, Opcode::LoadVar { off: r }) => { + push3.map(|op| Opcode::BinGlobalVar { + l_global: *l, + r: *r, + op, + }) + } + (Opcode::LoadVar { off: l }, Opcode::LoadGlobalVar { off: r }) => { + push3.map(|op| Opcode::BinVarGlobal { + l: *l, + r_global: *r, + op, + }) + } + (Opcode::LoadGlobalVar { off: l }, Opcode::LoadConstant { id: r }) => push3 + .map(|op| Opcode::BinGlobalConst { + l_global: *l, + r: *r, + op, + }), + (Opcode::LoadConstant { id: l }, Opcode::LoadGlobalVar { off: r }) => push3 + .map(|op| Opcode::BinConstGlobal { + l: *l, + r_global: *r, + op, + }), + (Opcode::LoadGlobalVar { off: l }, Opcode::LoadGlobalVar { off: r }) => push3 + .map(|op| Opcode::BinGlobalGlobal { + l_global: *l, + r_global: *r, + op, + }), _ => None, } } else { @@ -1656,17 +2079,45 @@ impl ByteCode { continue; } - // 2-window: [leaf load, Op2] with the lhs already on the stack. - let two = i + 1 < self.code.len() - && !jump_targets[i + 1] - && matches!(self.code[i + 1], Opcode::Op2 { .. }); + // 2-window: [leaf load, ] with the lhs already on the + // stack. The combiner is either `Op2` (pushing) or `BinOpAssign` + // (the stack-leaf assignment `dst = lhs op b`). The stack-leaf form + // keeps the operator in its payload, so every operator is handled. + let two = i + 1 < self.code.len() && !jump_targets[i + 1]; let fused2 = if two { - match (&self.code[i], &self.code[i + 1]) { - (Opcode::LoadVar { off: r }, Opcode::Op2 { op }) => { - Some(Opcode::BinStackVar { r: *r, op: *op }) - } - (Opcode::LoadConstant { id: r }, Opcode::Op2 { op }) => { - Some(Opcode::BinStackConst { r: *r, op: *op }) + // As above: a stack-leaf assign (operator kept in payload) or a + // pushing Op2. The stack-leaf form handles every operator. + let (assign2, push2) = match &self.code[i + 1] { + Opcode::BinOpAssignCurr { op, off } => (Some((*op, *off, false)), None), + Opcode::BinOpAssignNext { op, off } => (Some((*op, *off, true)), None), + Opcode::Op2 { op } => (None, Some(*op)), + _ => (None, None), + }; + match &self.code[i] { + Opcode::LoadVar { off: b } => assign2 + .map(|(op, dst, n)| { + if n { + Opcode::AssignStackVarNext { dst, b: *b, op } + } else { + Opcode::AssignStackVarCurr { dst, b: *b, op } + } + }) + .or_else(|| push2.map(|op| Opcode::BinStackVar { r: *b, op })), + Opcode::LoadConstant { id: b } => assign2 + .map(|(op, dst, n)| { + if n { + Opcode::AssignStackConstNext { dst, b: *b, op } + } else { + Opcode::AssignStackConstCurr { dst, b: *b, op } + } + }) + .or_else(|| push2.map(|op| Opcode::BinStackConst { r: *b, op })), + // `(lhs on stack) op global`. No global stack-leaf-assign + // opcode exists (the global ops are pushing-only), so a + // `BinOpAssign` combiner (push2 == None) is left unfused: the + // standalone `LoadGlobalVar` then the `BinOpAssign` pops both. + Opcode::LoadGlobalVar { off: b } => { + push2.map(|op| Opcode::BinStackGlobal { r_global: *b, op }) } _ => None, } @@ -3528,6 +3979,264 @@ mod tests { )); } + // === 3-address fused leaf assignments (R2 extension) === + // + // Input here is *post-peephole*: the trailing `Op2; AssignCurr` has already + // been folded to `BinOpAssignCurr` (and `...Next`). The leaf-assign fusion + // collapses the whole `dst = a op b` to one register-style op. + + #[test] + fn test_fuse_leaf_assign_var_var_curr() { + // `dst = a - b`: LoadVar; LoadVar; BinOpAssignCurr(Sub) -> one op. + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadVar { off: 3 }, + Opcode::LoadVar { off: 4 }, + Opcode::BinOpAssignCurr { + op: Op2::Sub, + off: 9, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!( + matches!( + bc.code[0], + Opcode::AssignSubVarVarCurr { l: 3, r: 4, dst: 9 } + ), + "got {:?}", + bc.code[0].name() + ); + } + + #[test] + fn test_fuse_leaf_assign_var_var_next_div_order() { + // `dst_next = a / b`: division is non-commutative, so a swapped encoding + // is a loud failure. Verify l=a (numerator), r=b (denominator). + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadVar { off: 1 }, + Opcode::LoadVar { off: 2 }, + Opcode::BinOpAssignNext { + op: Op2::Div, + off: 5, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::AssignDivVarVarNext { l: 1, r: 2, dst: 5 } + )); + } + + #[test] + fn test_fuse_leaf_assign_var_const_order() { + // `dst = a - 5`: var is lhs, const is rhs. + let mut bc = ByteCode { + literals: vec![5.0], + code: vec![ + Opcode::LoadVar { off: 7 }, + Opcode::LoadConstant { id: 0 }, + Opcode::BinOpAssignCurr { + op: Op2::Sub, + off: 2, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::AssignSubVarConstCurr { l: 7, r: 0, dst: 2 } + )); + } + + #[test] + fn test_fuse_leaf_assign_const_var_order() { + // `dst = 10 / a`: const is lhs (numerator literal), var is rhs. + let mut bc = ByteCode { + literals: vec![10.0], + code: vec![ + Opcode::LoadConstant { id: 0 }, + Opcode::LoadVar { off: 7 }, + Opcode::BinOpAssignCurr { + op: Op2::Div, + off: 2, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::AssignDivConstVarCurr { l: 0, r: 7, dst: 2 } + )); + } + + #[test] + fn test_fuse_leaf_assign_non_specialized_op_uses_stack_leaf() { + // `dst = a > b`: Gt has no dedicated 3-operand leaf-assign opcode, so the + // 3-window can't collapse it to a single op. But the operator-agnostic + // 2-window still folds the second load: `LoadVar a` stands alone, then + // `LoadVar b; BinOpAssignCurr(Gt)` becomes `AssignStackVarCurr{Gt}` with + // the lhs (a) taken from the stack. Net 3->2 (still a dispatch saved). + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadVar { off: 0 }, + Opcode::LoadVar { off: 1 }, + Opcode::BinOpAssignCurr { + op: Op2::Gt, + off: 2, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 2); + assert!(matches!(bc.code[0], Opcode::LoadVar { off: 0 })); + assert!(matches!( + bc.code[1], + Opcode::AssignStackVarCurr { + dst: 2, + b: 1, + op: Op2::Gt + } + )); + } + + #[test] + fn test_fuse_stack_leaf_assign_order() { + // `dst = (a - b) - c`: post-peephole, the inner `a - b` already fused to + // BinOpAssign? No -- the inner is a pushing subexpr (`Op2`), the outer is + // the assign. So the stream is LoadVar a; LoadVar b; Op2(Sub); LoadVar c; + // BinOpAssignCurr(Sub). The triple folds to AssignSub..? No: the triple's + // 3rd op is Op2, so it becomes BinVarVar; then [LoadVar c; + // BinOpAssignCurr(Sub)] is the stack-leaf 2-window -> AssignStackVarCurr + // with lhs = (a-b) on the stack, b = c. Sub is non-commutative: verify + // lhs (stack) - rhs (c), not c - (a-b). + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadVar { off: 0 }, // a + Opcode::LoadVar { off: 1 }, // b + Opcode::Op2 { op: Op2::Sub }, + Opcode::LoadVar { off: 2 }, // c + Opcode::BinOpAssignCurr { + op: Op2::Sub, + off: 9, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 2); + assert!(matches!( + bc.code[0], + Opcode::BinVarVar { + l: 0, + r: 1, + op: Op2::Sub + } + )); + assert!(matches!( + bc.code[1], + Opcode::AssignStackVarCurr { + dst: 9, + b: 2, + op: Op2::Sub + } + )); + } + + #[test] + fn test_fuse_stack_leaf_assign_const_next() { + // `dst_next = (a - b) / 4`: BinVarVar then AssignStackConstNext. + let mut bc = ByteCode { + literals: vec![4.0], + code: vec![ + Opcode::LoadVar { off: 0 }, + Opcode::LoadVar { off: 1 }, + Opcode::Op2 { op: Op2::Sub }, + Opcode::LoadConstant { id: 0 }, + Opcode::BinOpAssignNext { + op: Op2::Div, + off: 9, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 2); + assert!(matches!(bc.code[0], Opcode::BinVarVar { .. })); + assert!(matches!( + bc.code[1], + Opcode::AssignStackConstNext { + dst: 9, + b: 0, + op: Op2::Div + } + )); + } + + #[test] + fn test_fuse_leaf_assign_preserves_max_stack_depth() { + // Post-peephole leaf assigns must not change the (zero) net stack effect: + // each `LoadX; LoadX; BinOpAssign` is +2 then -2. After fusion the single + // op is (0,0). Build a sequence of leaf assigns and a nested one. + let mut bc = ByteCode { + literals: vec![2.0], + code: vec![ + // x = a - b + Opcode::LoadVar { off: 0 }, + Opcode::LoadVar { off: 1 }, + Opcode::BinOpAssignCurr { + op: Op2::Sub, + off: 4, + }, + // y = (c - d) * 2 (peak depth 2 mid-expression) + Opcode::LoadVar { off: 2 }, + Opcode::LoadVar { off: 3 }, + Opcode::Op2 { op: Op2::Sub }, + Opcode::LoadConstant { id: 0 }, + Opcode::BinOpAssignCurr { + op: Op2::Mul, + off: 5, + }, + ], + }; + let before = bc.max_stack_depth(); + bc.fuse_three_address(); + // Fusion folds loads into ops, so depth can only stay equal or shrink. + assert!(bc.max_stack_depth() <= before); + // And the leaf-assign collapsed to a (0,0) op, the stack-leaf assign to + // (1,0): the whole stream's peak is now 1 (the BinVarVar push). + assert_eq!(bc.max_stack_depth(), 1); + } + + #[test] + fn test_fuse_leaf_assign_blocked_by_jump_target() { + // A jump targets the BinOpAssignCurr (the instruction the triple would + // absorb). Fusing would make the jump land mid-fusion, so leave it alone. + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadVar { off: 0 }, + Opcode::LoadVar { off: 1 }, + Opcode::BinOpAssignCurr { + op: Op2::Sub, + off: 2, + }, // <- jump target + Opcode::NextIterOrJump { jump_back: -1 }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 4); + assert!(matches!(bc.code[2], Opcode::BinOpAssignCurr { .. })); + } + #[test] fn test_fuse_noop_without_op2_and_empty() { // Nothing to fold into (no Op2): unchanged. @@ -3616,6 +4325,275 @@ mod tests { Opcode::NextIterOrJump { jump_back: -1 } )); } + + // === 3-address fusion with GLOBAL operands and two-constant operands === + // + // Globals (TIME/DT/...) load via `LoadGlobalVar`; the fusion now folds them + // in operand position. Sub and Div are exercised because they are + // non-commutative, so a swapped operand encoding is a loud failure rather + // than a silent miscompile. + + #[test] + fn test_fuse_global_var_pushing() { + // `(g - v)` as a pushing subexpression (third op is Op2). The global is + // the lhs leaf, the var the rhs leaf. + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadGlobalVar { off: 0 }, + Opcode::LoadVar { off: 7 }, + Opcode::Op2 { op: Op2::Sub }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!( + matches!( + bc.code[0], + Opcode::BinGlobalVar { + l_global: 0, + r: 7, + op: Op2::Sub + } + ), + "got {}", + bc.code[0].name() + ); + } + + #[test] + fn test_fuse_var_global_div_order() { + // `v / g`: var is the lhs (numerator), global the rhs (denominator). + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadVar { off: 4 }, + Opcode::LoadGlobalVar { off: 1 }, + Opcode::Op2 { op: Op2::Div }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::BinVarGlobal { + l: 4, + r_global: 1, + op: Op2::Div + } + )); + } + + #[test] + fn test_fuse_global_const_sub_order() { + // `g - 5`: global lhs, const rhs. + let mut bc = ByteCode { + literals: vec![5.0], + code: vec![ + Opcode::LoadGlobalVar { off: 0 }, + Opcode::LoadConstant { id: 0 }, + Opcode::Op2 { op: Op2::Sub }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::BinGlobalConst { + l_global: 0, + r: 0, + op: Op2::Sub + } + )); + } + + #[test] + fn test_fuse_const_global_div_order() { + // `10 / g`: const lhs (numerator literal), global rhs (denominator). + let mut bc = ByteCode { + literals: vec![10.0], + code: vec![ + Opcode::LoadConstant { id: 0 }, + Opcode::LoadGlobalVar { off: 1 }, + Opcode::Op2 { op: Op2::Div }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::BinConstGlobal { + l: 0, + r_global: 1, + op: Op2::Div + } + )); + } + + #[test] + fn test_fuse_global_global_sub_order() { + // `g0 - g1`: both leaves are globals. Distinct offsets verify l/r order. + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadGlobalVar { off: 0 }, + Opcode::LoadGlobalVar { off: 1 }, + Opcode::Op2 { op: Op2::Sub }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::BinGlobalGlobal { + l_global: 0, + r_global: 1, + op: Op2::Sub + } + )); + } + + #[test] + fn test_fuse_stack_global() { + // `(a - b) / g`: leaf triple -> BinVarVar; the outer `/ g` (lhs on stack) + // -> BinStackGlobal. Div is non-commutative: lhs (stack) / rhs (g). + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadVar { off: 0 }, + Opcode::LoadVar { off: 1 }, + Opcode::Op2 { op: Op2::Sub }, + Opcode::LoadGlobalVar { off: 1 }, + Opcode::Op2 { op: Op2::Div }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 2); + assert!(matches!( + bc.code[0], + Opcode::BinVarVar { + l: 0, + r: 1, + op: Op2::Sub + } + )); + assert!(matches!( + bc.code[1], + Opcode::BinStackGlobal { + r_global: 1, + op: Op2::Div + } + )); + } + + #[test] + fn test_fuse_const_const_sub_order() { + // `5 - 2` from two distinct literals: fuses the two loads + Op2 into one + // BinConstConst that still computes literals[0] - literals[1] at run time + // (NOT compile-time folding -- the operands stay two separate literals). + let mut bc = ByteCode { + literals: vec![5.0, 2.0], + code: vec![ + Opcode::LoadConstant { id: 0 }, + Opcode::LoadConstant { id: 1 }, + Opcode::Op2 { op: Op2::Sub }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::BinConstConst { + l: 0, + r: 1, + op: Op2::Sub + } + )); + } + + #[test] + fn test_fuse_const_const_div_order() { + // `10 / 4` from two distinct literals -> BinConstConst with l/r order. + let mut bc = ByteCode { + literals: vec![10.0, 4.0], + code: vec![ + Opcode::LoadConstant { id: 0 }, + Opcode::LoadConstant { id: 1 }, + Opcode::Op2 { op: Op2::Div }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 1); + assert!(matches!( + bc.code[0], + Opcode::BinConstConst { + l: 0, + r: 1, + op: Op2::Div + } + )); + } + + #[test] + fn test_fuse_global_leaf_assign_falls_through_to_stack_leaf() { + // `dst = g - v`: post-peephole this is LoadGlobalVar; LoadVar; + // BinOpAssignCurr. There is deliberately NO global *leaf-assign* opcode + // (the global forms are pushing-only), so the 3-window does not collapse + // it to a single op. The standalone LoadGlobalVar remains and the 2-window + // folds the rhs+store into AssignStackVarCurr with the global as the + // stack lhs (3->2, still a dispatch saved, and correct). + let mut bc = ByteCode { + literals: vec![], + code: vec![ + Opcode::LoadGlobalVar { off: 0 }, + Opcode::LoadVar { off: 7 }, + Opcode::BinOpAssignCurr { + op: Op2::Sub, + off: 9, + }, + ], + }; + bc.fuse_three_address(); + assert_eq!(bc.code.len(), 2); + assert!(matches!(bc.code[0], Opcode::LoadGlobalVar { off: 0 })); + assert!(matches!( + bc.code[1], + Opcode::AssignStackVarCurr { + dst: 9, + b: 7, + op: Op2::Sub + } + )); + } + + #[test] + fn test_fuse_global_const_const_preserve_max_stack_depth() { + // Each new pushing form has the same net stack effect as the Load;Load;Op2 + // it replaces (+1), so fusion can only keep or shrink the peak depth -- + // the Stack-safety proof must survive. Mix global, const-const, and + // stack-global forms. + let mut bc = ByteCode { + literals: vec![2.0, 3.0], + code: vec![ + // (g - v) - here a pushing subexpr + Opcode::LoadGlobalVar { off: 0 }, + Opcode::LoadVar { off: 1 }, + Opcode::Op2 { op: Op2::Sub }, + // ... / g (lhs on stack -> BinStackGlobal) + Opcode::LoadGlobalVar { off: 1 }, + Opcode::Op2 { op: Op2::Div }, + // + (2 - 3) (const-const pushing -> BinConstConst, peak +1) + Opcode::LoadConstant { id: 0 }, + Opcode::LoadConstant { id: 1 }, + Opcode::Op2 { op: Op2::Sub }, + Opcode::Op2 { op: Op2::Add }, + Opcode::AssignCurr { off: 5 }, + ], + }; + let before = bc.max_stack_depth(); + bc.fuse_three_address(); + assert!(bc.max_stack_depth() <= before); + } } /// A single variable's compiled initial-value bytecode, along with the diff --git a/src/simlin-engine/src/compiler/symbolic.rs b/src/simlin-engine/src/compiler/symbolic.rs index 66361f621..116cf8e0b 100644 --- a/src/simlin-engine/src/compiler/symbolic.rs +++ b/src/simlin-engine/src/compiler/symbolic.rs @@ -518,16 +518,55 @@ pub(crate) fn symbolize_opcode( op: *op, var: rmap.lookup(u32::from(*off))?, }), - // The 3-address fused binops are created by `ByteCode::fuse_three_address`, - // which runs only on FINAL concrete bytecode (after `resolve`), strictly - // after symbolization. They therefore never reach this function; seeing + // The 3-address fused binops AND the fused leaf assignments are created + // by `ByteCode::fuse_three_address`, which runs only on FINAL concrete + // bytecode (after `resolve`), strictly after symbolization, and only on + // the Vm's private execution copy (never the salsa-cached + // CompiledSimulation). They therefore never reach this function; seeing // one means the fusion ran before symbolize, which is a compiler bug. + // The exhaustive match here is the guarantee no fused opcode can silently + // leak into the symbolic/incremental layer. Opcode::BinVarVar { .. } | Opcode::BinVarConst { .. } | Opcode::BinConstVar { .. } | Opcode::BinStackVar { .. } - | Opcode::BinStackConst { .. } => { - unreachable!("3-address fused binop reached symbolize_opcode") + | Opcode::BinStackConst { .. } + | Opcode::BinGlobalVar { .. } + | Opcode::BinVarGlobal { .. } + | Opcode::BinGlobalConst { .. } + | Opcode::BinConstGlobal { .. } + | Opcode::BinGlobalGlobal { .. } + | Opcode::BinStackGlobal { .. } + | Opcode::BinConstConst { .. } + | Opcode::AssignAddVarVarCurr { .. } + | Opcode::AssignSubVarVarCurr { .. } + | Opcode::AssignMulVarVarCurr { .. } + | Opcode::AssignDivVarVarCurr { .. } + | Opcode::AssignAddVarVarNext { .. } + | Opcode::AssignSubVarVarNext { .. } + | Opcode::AssignMulVarVarNext { .. } + | Opcode::AssignDivVarVarNext { .. } + | Opcode::AssignAddVarConstCurr { .. } + | Opcode::AssignSubVarConstCurr { .. } + | Opcode::AssignMulVarConstCurr { .. } + | Opcode::AssignDivVarConstCurr { .. } + | Opcode::AssignAddVarConstNext { .. } + | Opcode::AssignSubVarConstNext { .. } + | Opcode::AssignMulVarConstNext { .. } + | Opcode::AssignDivVarConstNext { .. } + | Opcode::AssignAddConstVarCurr { .. } + | Opcode::AssignSubConstVarCurr { .. } + | Opcode::AssignMulConstVarCurr { .. } + | Opcode::AssignDivConstVarCurr { .. } + | Opcode::AssignAddConstVarNext { .. } + | Opcode::AssignSubConstVarNext { .. } + | Opcode::AssignMulConstVarNext { .. } + | Opcode::AssignDivConstVarNext { .. } + | Opcode::AssignStackVarCurr { .. } + | Opcode::AssignStackVarNext { .. } + | Opcode::AssignStackConstCurr { .. } + | Opcode::AssignStackConstNext { .. } => { + unreachable!("3-address fused opcode reached symbolize_opcode") } Opcode::PushVarView { base_off, diff --git a/src/simlin-engine/src/vm.rs b/src/simlin-engine/src/vm.rs index dfe2ece23..5b3b40524 100644 --- a/src/simlin-engine/src/vm.rs +++ b/src/simlin-engine/src/vm.rs @@ -1495,6 +1495,209 @@ impl Vm { let rv = bytecode.literals[*r as usize]; stack.push(eval_op2(*op, lv, rv)); } + // === 3-ADDRESS BINARY OPS WITH GLOBAL OPERANDS (R2 extension) === + // A `_global` operand is read from `curr[g]` directly (an + // absolute global slot, NO module_off -- like LoadGlobalVar), + // while a plain var operand is `curr[module_off + v]` (like + // LoadVar). The operand order `l op r` matches the original load + // order (load-bearing for Sub/Div). + Opcode::BinGlobalVar { l_global, r, op } => { + let lv = curr[*l_global as usize]; + let rv = curr[module_off + *r as usize]; + stack.push(eval_op2(*op, lv, rv)); + } + Opcode::BinVarGlobal { l, r_global, op } => { + let lv = curr[module_off + *l as usize]; + let rv = curr[*r_global as usize]; + stack.push(eval_op2(*op, lv, rv)); + } + Opcode::BinGlobalConst { l_global, r, op } => { + let lv = curr[*l_global as usize]; + let rv = bytecode.literals[*r as usize]; + stack.push(eval_op2(*op, lv, rv)); + } + Opcode::BinConstGlobal { l, r_global, op } => { + let lv = bytecode.literals[*l as usize]; + let rv = curr[*r_global as usize]; + stack.push(eval_op2(*op, lv, rv)); + } + Opcode::BinGlobalGlobal { + l_global, + r_global, + op, + } => { + let lv = curr[*l_global as usize]; + let rv = curr[*r_global as usize]; + stack.push(eval_op2(*op, lv, rv)); + } + Opcode::BinStackGlobal { r_global, op } => { + let lv = stack.pop(); + let rv = curr[*r_global as usize]; + stack.push(eval_op2(*op, lv, rv)); + } + // Two constant leaves: still evaluated at run time (the operands + // are two distinct interned literals, not a folded result). + Opcode::BinConstConst { l, r, op } => { + let lv = bytecode.literals[*l as usize]; + let rv = bytecode.literals[*r as usize]; + stack.push(eval_op2(*op, lv, rv)); + } + // === 3-ADDRESS FUSED LEAF ASSIGNMENTS (R2 extension) === + // `dst = a op b`, operands read straight from curr[]/literals, + // result written straight to curr[]/next[]. The operator is in + // the opcode tag, so each arm is one straight-line f64 op with no + // `eval_op2` branch. The stack is untouched (the original + // sequence pushed two operands and popped both into the store), + // so it must already be empty -- the same invariant the + // BinOpAssign* arms assert. + Opcode::AssignAddVarVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] + curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignSubVarVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] - curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignMulVarVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] * curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignDivVarVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] / curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignAddVarVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] + curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignSubVarVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] - curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignMulVarVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] * curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignDivVarVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] / curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignAddVarConstCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] + bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignSubVarConstCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] - bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignMulVarConstCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] * bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignDivVarConstCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + curr[module_off + *l as usize] / bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignAddVarConstNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] + bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignSubVarConstNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] - bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignMulVarConstNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] * bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignDivVarConstNext { l, r, dst } => { + next[module_off + *dst as usize] = + curr[module_off + *l as usize] / bytecode.literals[*r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignAddConstVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + bytecode.literals[*l as usize] + curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignSubConstVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + bytecode.literals[*l as usize] - curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignMulConstVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + bytecode.literals[*l as usize] * curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignDivConstVarCurr { l, r, dst } => { + curr[module_off + *dst as usize] = + bytecode.literals[*l as usize] / curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignAddConstVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + bytecode.literals[*l as usize] + curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignSubConstVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + bytecode.literals[*l as usize] - curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignMulConstVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + bytecode.literals[*l as usize] * curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignDivConstVarNext { l, r, dst } => { + next[module_off + *dst as usize] = + bytecode.literals[*l as usize] / curr[module_off + *r as usize]; + debug_assert_eq!(0, stack.len()); + } + // === 2-ADDRESS STACK-LEAF FUSED ASSIGNMENTS (R2 extension) === + // `dst = lhs op b`: pop the pre-existing lhs from the stack, + // combine it with the leaf rhs (operator in payload), store. The + // pop leaves the stack empty, matching the BinOpAssign* invariant. + Opcode::AssignStackVarCurr { dst, b, op } => { + let lhs = stack.pop(); + let rhs = curr[module_off + *b as usize]; + curr[module_off + *dst as usize] = eval_op2(*op, lhs, rhs); + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignStackVarNext { dst, b, op } => { + let lhs = stack.pop(); + let rhs = curr[module_off + *b as usize]; + next[module_off + *dst as usize] = eval_op2(*op, lhs, rhs); + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignStackConstCurr { dst, b, op } => { + let lhs = stack.pop(); + let rhs = bytecode.literals[*b as usize]; + curr[module_off + *dst as usize] = eval_op2(*op, lhs, rhs); + debug_assert_eq!(0, stack.len()); + } + Opcode::AssignStackConstNext { dst, b, op } => { + let lhs = stack.pop(); + let rhs = bytecode.literals[*b as usize]; + next[module_off + *dst as usize] = eval_op2(*op, lhs, rhs); + debug_assert_eq!(0, stack.len()); + } Opcode::Apply { func } => { let time = curr[TIME_OFF]; let dt = curr[DT_OFF]; @@ -3484,9 +3687,12 @@ mod vm_reset_and_run_initials_tests { /// Vm's flow/stock bytecode at construction. Uses subtraction and division /// (non-commutative) so a swapped operand encoding in any fused form is a /// loud failure rather than a silent miscompile. `a`, `b`, `c` are distinct - /// variables (not foldable into a literal), so each expression compiles to - /// loads + Op2 that the pass folds into BinVarVar / BinVarConst / - /// BinConstVar / BinStackVar / BinStackConst. + /// variables (not foldable into a literal). + /// + /// Post-peephole + post-fusion, a *top-level* leaf binop assignment `x = a + /// op b` collapses all the way to one register-style leaf-assign op (the R2 + /// extension), while the inner subexpression of a nested expression stays a + /// pushing `BinVarVar` whose result the outer op consumes from the stack. #[test] fn test_fused_binops_preserve_operand_order() { let tp = TestProject::new("fusion_order") @@ -3494,12 +3700,12 @@ mod vm_reset_and_run_initials_tests { .aux("a", "20", None) .aux("b", "5", None) .aux("c", "2", None) - .aux("vv", "a - b", None) // BinVarVar - .aux("dvv", "a / b", None) // BinVarVar, division - .aux("vc", "a - 3", None) // BinVarConst - .aux("cv", "10 - a", None) // BinConstVar - .aux("sv", "(a - b) - c", None) // BinVarVar then BinStackVar - .aux("sc", "(a - b) - 4", None); // BinVarVar then BinStackConst + .aux("vv", "a - b", None) // AssignSubVarVarCurr (leaf assign) + .aux("dvv", "a / b", None) // AssignDivVarVarCurr (leaf assign, division) + .aux("vc", "a - 3", None) // AssignSubVarConstCurr (leaf assign) + .aux("cv", "10 - a", None) // AssignSubConstVarCurr (leaf assign) + .aux("sv", "(a - b) - c", None) // BinVarVar then AssignStackVarCurr + .aux("sc", "(a - b) - 4", None); // BinVarVar then AssignStackConstCurr let compiled = build_compiled(&tp); let mut vm = Vm::new(compiled).unwrap(); @@ -3522,6 +3728,157 @@ mod vm_reset_and_run_initials_tests { assert_eq!(val("sc"), 11.0, "(a - b) - 4"); } + /// End-to-end guard for the global-operand and two-constant fused binops + /// (the R2 extension capturing the leaf-operand loads the original fusion + /// missed). All operators are Sub or Div (non-commutative) so a swapped + /// operand encoding in any new fused form is a loud failure, not a silent + /// miscompile. Each global appears as a leaf operand of a *pushing* + /// subexpression (the outer `- c` is the assigning op) so the fusion emits + /// the pushing `BinGlobal*` / `BinConstConst` forms rather than a leaf + /// assign. The opcode each equation produces is pinned by a sibling + /// `bytecode_profile().fused_histogram` assertion so the test cannot pass + /// vacuously through the un-fused path. + /// + /// At step 0: TIME=10, DT=2 (the sim start time and dt -- both globals, + /// loaded via LoadGlobalVar), b=5, c=2, e=1. + #[test] + fn test_fused_global_and_const_binops_preserve_operand_order() { + let tp = TestProject::new("global_fusion_order") + .with_sim_time(10.0, 20.0, 2.0) + .aux("b", "5", None) + .aux("c", "2", None) + .aux("e", "1", None) + .aux("gv", "(TIME - b) - c", None) // BinGlobalVar(Sub) then stack-leaf + .aux("vg", "(b / DT) - c", None) // BinVarGlobal(Div) + .aux("gc", "(TIME - 3) - c", None) // BinGlobalConst(Sub) + .aux("cg", "(3 / DT) - c", None) // BinConstGlobal(Div) + .aux("gg", "(TIME - DT) - c", None) // BinGlobalGlobal(Sub) + .aux("cc", "(7 - 3) - c", None) // BinConstConst(Sub) + .aux("sg", "((b - c) / DT) - e", None); // BinVarVar then BinStackGlobal(Div) + + let compiled = build_compiled(&tp); + + // Pin the fused shape: every new form must actually be emitted, so the + // numeric asserts below exercise the fused dispatch arms (not a fallback). + let fused = &compiled.bytecode_profile().fused_histogram; + for name in [ + "BinGlobalVar", + "BinVarGlobal", + "BinGlobalConst", + "BinConstGlobal", + "BinGlobalGlobal", + "BinConstConst", + "BinStackGlobal", + ] { + assert!( + fused.get(name).copied().unwrap_or(0) >= 1, + "expected at least one {name} in the fused stream; got {fused:?}" + ); + } + + let mut vm = Vm::new(compiled).unwrap(); + vm.run_to_end().unwrap(); + let results = vm.into_results(); + let val = |name: &str| -> f64 { + let off = *results + .offsets + .get(&*canonicalize(name)) + .unwrap_or_else(|| panic!("missing {name}")); + results.data[off] // step 0 + }; + + assert_eq!(val("gv"), 3.0, "(TIME - b) - c = (10 - 5) - 2"); + assert_eq!(val("vg"), 0.5, "(b / DT) - c = (5 / 2) - 2"); + assert_eq!(val("gc"), 5.0, "(TIME - 3) - c = (10 - 3) - 2"); + assert_eq!(val("cg"), -0.5, "(3 / DT) - c = (3 / 2) - 2"); + assert_eq!(val("gg"), 6.0, "(TIME - DT) - c = (10 - 2) - 2"); + assert_eq!(val("cc"), 2.0, "(7 - 3) - c = (7 - 3) - 2"); + assert_eq!(val("sg"), 0.5, "((b - c) / DT) - e = ((5 - 2) / 2) - 1"); + } + + /// The single most dangerous risk for the global-operand fused binops: a + /// `_global` operand MUST read `curr[g]` (an absolute global slot), NOT + /// `curr[module_off + g]`. They are the same slot only at the root + /// (`module_off == 0`); inside a submodule they differ, so a `module_off + g` + /// miscompile is invisible at the root but corrupts every submodule. + /// + /// This builds a real two-model project: `main` instantiates `sub`, whose + /// body computes `gv = (TIME - b) - c` (a `BinGlobalVar`) and + /// `vg = (b / DT) - c` (a `BinVarGlobal`) where `module_off > 0`. With + /// TIME=10, DT=2 (the sim start/dt globals at curr[0]/curr[1]) and the + /// submodule's own b=5, c=2, the correct values are gv=3, vg=0.5. The + /// submodule's slots 0/1 hold its own variables (curr[module_off+0/1]), whose + /// values differ from the globals, so a `module_off`-relative read would + /// produce different numbers. + #[test] + fn test_global_operand_binop_reads_global_not_module_relative() { + use crate::datamodel; + use crate::db::{SimlinDb, compile_project_incremental, sync_from_datamodel_incremental}; + use crate::testutils::{x_aux, x_model, x_module, x_project}; + + let sim_specs = datamodel::SimSpecs { + start: 10.0, + stop: 12.0, + dt: datamodel::Dt::Dt(2.0), + save_step: None, + sim_method: datamodel::SimMethod::Euler, + time_units: None, + }; + + // `sub` is instantiated by `main`, so its variables live at module_off>0. + // `b`/`c` are plain auxes (module_off-relative LoadVar operands); TIME/DT + // are globals (LoadGlobalVar operands at curr[0]/curr[1]). + let main = x_model("main", vec![x_module("sub", &[], None)]); + let sub = x_model( + "sub", + vec![ + x_aux("b", "5", None), + x_aux("c", "2", None), + x_aux("gv", "(TIME - b) - c", None), + x_aux("vg", "(b / DT) - c", None), + ], + ); + let datamodel = x_project(sim_specs, &[main, sub]); + + let mut db = SimlinDb::default(); + let sync = sync_from_datamodel_incremental(&mut db, &datamodel, None); + let compiled = compile_project_incremental(&db, sync.project, "main") + .expect("two-model project should compile"); + + // Confirm the fused global ops were actually emitted (inside the + // submodule), so the dispatch path under test is exercised. + let fused = &compiled.bytecode_profile().fused_histogram; + assert!( + fused.get("BinGlobalVar").copied().unwrap_or(0) >= 1, + "expected a BinGlobalVar in the submodule's fused stream; got {fused:?}" + ); + assert!( + fused.get("BinVarGlobal").copied().unwrap_or(0) >= 1, + "expected a BinVarGlobal in the submodule's fused stream; got {fused:?}" + ); + + let mut vm = Vm::new(compiled).unwrap(); + vm.run_to_end().unwrap(); + let results = vm.into_results(); + let series = crate::test_common::collect_results(&results); + + // Submodule variables are reported under their qualified (middot-joined) + // `sub·` names. A module_off-relative read of TIME/DT would yield + // other numbers. + let gv = series + .get("sub\u{b7}gv") + .unwrap_or_else(|| panic!("missing sub\u{b7}gv; have {:?}", series.keys())); + let vg = series.get("sub\u{b7}vg").expect("missing sub\u{b7}vg"); + assert_eq!( + gv[0], 3.0, + "(TIME - b) - c = (10 - 5) - 2 -- TIME must be the global, not curr[module_off]" + ); + assert_eq!( + vg[0], 0.5, + "(b / DT) - c = (5 / 2) - 2 -- DT must be the global, not curr[module_off+1]" + ); + } + #[test] fn test_vm_reset_produces_identical_results() { let tp = pop_model(); @@ -3845,543 +4202,8 @@ mod vm_reset_and_run_initials_tests { } #[cfg(test)] -mod set_value_tests { - use super::*; - use crate::test_common::TestProject; - - /// Model: rate=0.1, scaled_rate=rate*10, stock initial=scaled_rate. - /// `rate` and `scaled_rate` are both stock dependencies in the initials. - fn rate_model() -> TestProject { - TestProject::new("rate_model") - .with_sim_time(0.0, 10.0, 1.0) - .aux("rate", "0.1", None) - .aux("scaled_rate", "rate * 10", None) - .flow("inflow", "population * rate", None) - .flow("outflow", "population / 80", None) - .stock("population", "scaled_rate", &["inflow"], &["outflow"], None) - } - - fn build_compiled(tp: &TestProject) -> CompiledSimulation { - tp.compile_incremental() - .expect("incremental compile should succeed") - } - - #[test] - fn test_override_constant_flows_through_dependent_initials() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - - // Override rate from 0.1 to 0.2 - vm.set_value(&Ident::new("rate"), 0.2).unwrap(); - vm.run_initials().unwrap(); - - let rate_off = vm.get_offset(&Ident::new("rate")).unwrap(); - let sr_off = vm.get_offset(&Ident::new("scaled_rate")).unwrap(); - let pop_off = vm.get_offset(&Ident::new("population")).unwrap(); - - assert_eq!( - vm.get_value_now(rate_off), - 0.2, - "rate should be overridden to 0.2" - ); - assert_eq!( - vm.get_value_now(sr_off), - 2.0, - "scaled_rate = rate*10 = 0.2*10 = 2.0" - ); - assert_eq!( - vm.get_value_now(pop_off), - 2.0, - "population initial = scaled_rate = 2.0" - ); - } - - #[test] - fn test_override_affects_simulation_results() { - let compiled = build_compiled(&rate_model()); - - // Run without override - let mut vm1 = Vm::new(compiled.clone()).unwrap(); - vm1.run_to_end().unwrap(); - let series1 = vm1.get_series(&Ident::new("population")).unwrap(); - - // Run with override: higher rate means more growth - let mut vm2 = Vm::new(compiled).unwrap(); - vm2.set_value(&Ident::new("rate"), 0.2).unwrap(); - vm2.run_to_end().unwrap(); - let series2 = vm2.get_series(&Ident::new("population")).unwrap(); - - assert!( - series2.last().unwrap() > series1.last().unwrap(), - "higher rate should produce higher final population: {} vs {}", - series2.last().unwrap(), - series1.last().unwrap() - ); - - // Verify the override affects flows: rate should be 0.2 throughout - let rate_series = vm2.get_series(&Ident::new("rate")).unwrap(); - for (i, &val) in rate_series.iter().enumerate() { - assert!( - (val - 0.2).abs() < 1e-10, - "rate should be 0.2 at every step, got {} at step {}", - val, - i - ); - } - } - - #[test] - fn test_override_persists_across_reset() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - - vm.set_value(&Ident::new("rate"), 0.2).unwrap(); - vm.run_to_end().unwrap(); - let series_before = vm.get_series(&Ident::new("population")).unwrap(); - - vm.reset(); - vm.run_to_end().unwrap(); - let series_after = vm.get_series(&Ident::new("population")).unwrap(); - - for (i, (a, b)) in series_before.iter().zip(series_after.iter()).enumerate() { - assert!( - (a - b).abs() < 1e-10, - "override should persist across reset: step {i}: {a} vs {b}" - ); - } - } - - #[test] - fn test_clear_values_restores_defaults() { - let compiled = build_compiled(&rate_model()); - - // Baseline run - let mut vm_baseline = Vm::new(compiled.clone()).unwrap(); - vm_baseline.run_to_end().unwrap(); - let baseline = vm_baseline.get_series(&Ident::new("population")).unwrap(); - - // Run with override - let mut vm = Vm::new(compiled).unwrap(); - vm.set_value(&Ident::new("rate"), 0.5).unwrap(); - vm.run_to_end().unwrap(); - let overridden = vm.get_series(&Ident::new("population")).unwrap(); - - // Clear and re-run - vm.clear_values(); - vm.reset(); - vm.run_to_end().unwrap(); - let restored = vm.get_series(&Ident::new("population")).unwrap(); - - // Overridden should differ from baseline - assert!( - (overridden.last().unwrap() - baseline.last().unwrap()).abs() > 1.0, - "overridden should differ from baseline" - ); - // Restored should match baseline - for (i, (b, r)) in baseline.iter().zip(restored.iter()).enumerate() { - assert!( - (b - r).abs() < 1e-10, - "after clear_values, should match baseline: step {i}: {b} vs {r}" - ); - } - } - - #[test] - fn test_multiple_reset_set_value_cycles() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - - let mut prev_final = 0.0; - for i in 1..=10 { - let rate_val = i as f64 * 0.01; - vm.set_value(&Ident::new("rate"), rate_val).unwrap(); - vm.reset(); - vm.run_to_end().unwrap(); - let series = vm.get_series(&Ident::new("population")).unwrap(); - let final_val = *series.last().unwrap(); - if i > 1 { - assert!( - final_val > prev_final, - "final pop should increase with rate: rate={rate_val}, final={final_val}, prev={prev_final}" - ); - } - prev_final = final_val; - } - } - - #[test] - fn test_override_nonexistent_variable_returns_error() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - let result = vm.set_value(&Ident::new("nonexistent_var"), 1.0); - assert!( - result.is_err(), - "overriding nonexistent variable should fail" - ); - } - - #[test] - fn test_set_value_returns_correct_offset() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - let rate_ident = Ident::new("rate"); - - let expected_off = vm.get_offset(&rate_ident).unwrap(); - let returned_off = vm.set_value(&rate_ident, 0.5).unwrap(); - assert_eq!( - returned_off, expected_off, - "set_value should return the data-buffer offset of the variable" - ); - } - - #[test] - fn test_override_by_offset_out_of_bounds_returns_error() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - let err = vm.set_value_by_offset(99999, 1.0).unwrap_err(); - assert_eq!(err.code, crate::common::ErrorCode::BadOverride); - } - - #[test] - fn test_set_value_non_constant_variable_returns_error() { - // `births = pop * birth_rate` is a computed flow, not a simple constant - let tp = TestProject::new("non_constant_override") - .with_sim_time(0.0, 10.0, 1.0) - .aux("birth_rate", "0.1", None) - .flow("births", "pop * birth_rate", None) - .stock("pop", "100", &["births"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - let mut vm = Vm::new(compiled).unwrap(); - - // birth_rate IS a simple constant, so set_value should succeed - vm.set_value(&Ident::new("birth_rate"), 0.5).unwrap(); - - // births is a computed flow (not a constant), so set_value should fail - let err = vm.set_value(&Ident::new("births"), 42.0).unwrap_err(); - assert_eq!(err.code, crate::common::ErrorCode::BadOverride); - } - - #[test] - fn test_set_value_non_constant_returns_error() { - let tp = TestProject::new("non_constant_set") - .with_sim_time(0.0, 10.0, 1.0) - .aux("rate", "0.1", None) - .aux("computed", "rate * 10", None) - .flow("inflow", "pop * rate", None) - .stock("pop", "100", &["inflow"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - let mut vm = Vm::new(compiled).unwrap(); - - // "computed" depends on "rate", so it's not a simple constant - let err = vm.set_value(&Ident::new("computed"), 5.0).unwrap_err(); - assert_eq!(err.code, crate::common::ErrorCode::BadOverride); - - // Stocks also cannot be set via set_value - let err = vm.set_value(&Ident::new("pop"), 500.0).unwrap_err(); - assert_eq!(err.code, crate::common::ErrorCode::BadOverride); - } - - #[test] - fn test_set_value_after_initials_affects_flows_but_not_stock_initials() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - - // Run initials first (stock initial = scaled_rate = rate*10 = 1.0) - vm.run_initials().unwrap(); - - // Set value AFTER initials - vm.set_value(&Ident::new("rate"), 0.5).unwrap(); - - // The stock initial is already set (from rate=0.1), but flows will use rate=0.5 - vm.run_to_end().unwrap(); - let series1 = vm.get_series(&Ident::new("population")).unwrap(); - - // Now reset and run - BOTH initials and flows use rate=0.5 - vm.reset(); - vm.run_to_end().unwrap(); - let series2 = vm.get_series(&Ident::new("population")).unwrap(); - - // series1 used rate=0.1 for initials but rate=0.5 for flows - // series2 used rate=0.5 for both - // They should differ (different initial stock values) - assert!( - (series1[0] - series2[0]).abs() > 0.1, - "initial stock values should differ: first={}, second={}", - series1[0], - series2[0] - ); - } - - #[test] - fn test_conflicting_writes_to_same_offset() { - let compiled = build_compiled(&rate_model()); - let mut vm = Vm::new(compiled).unwrap(); - - let rate_off = vm.get_offset(&Ident::new("rate")).unwrap(); - - // Two writes to the same offset - last one wins - vm.set_value_by_offset(rate_off, 0.1).unwrap(); - vm.set_value_by_offset(rate_off, 0.3).unwrap(); - - vm.run_initials().unwrap(); - assert_eq!(vm.get_value_now(rate_off), 0.3, "last override should win"); - } - - #[test] - fn test_set_value_module_stock_returns_error() { - let test_file = concat!( - env!("CARGO_MANIFEST_DIR"), - "/../../test/modules_hares_and_foxes/modules_hares_and_foxes.stmx" - ); - let file_bytes = - std::fs::read(test_file).expect("modules_hares_and_foxes test fixture must exist"); - let mut cursor = std::io::Cursor::new(file_bytes); - let project_datamodel = crate::open_xmile(&mut cursor).unwrap(); - let mut db = crate::db::SimlinDb::default(); - let sync = crate::db::sync_from_datamodel_incremental(&mut db, &project_datamodel, None); - let compiled = crate::db::compile_project_incremental(&db, sync.project, "main").unwrap(); - - let mut vm = Vm::new(compiled).unwrap(); - let hares_ident = Ident::new("hares.hares"); - assert!( - vm.get_offset(&hares_ident).is_some(), - "hares·hares should exist in offsets" - ); - // Stocks are not simple constants, so set_value should fail - let err = vm.set_value(&hares_ident, 500.0).unwrap_err(); - assert_eq!(err.code, crate::common::ErrorCode::BadOverride); - } - - #[test] - fn test_override_partial_array() { - let tp = TestProject::new("array_override") - .with_sim_time(0.0, 1.0, 1.0) - .named_dimension("Dim", &["A", "B", "C"]) - .array_with_ranges("arr[Dim]", vec![("A", "1"), ("B", "2"), ("C", "3")]) - .aux("total", "arr[A] + arr[B] + arr[C]", None) - .flow("inflow", "0", None) - .stock("s", "total", &["inflow"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - let mut vm = Vm::new(compiled).unwrap(); - - let arr_b_ident = Ident::new("arr[b]"); - let arr_b_off = vm - .get_offset(&arr_b_ident) - .expect("arr[b] should exist in offsets"); - vm.set_value_by_offset(arr_b_off, 99.0).unwrap(); - vm.run_initials().unwrap(); - assert_eq!( - vm.get_value_now(arr_b_off), - 99.0, - "arr[b] should be overridden to 99" - ); - let s_off = vm.get_offset(&Ident::new("s")).unwrap(); - // total = arr[A]+arr[B]+arr[C] = 1+99+3 = 103 - assert_eq!( - vm.get_value_now(s_off), - 103.0, - "stock should reflect overridden array element: 1+99+3=103" - ); - } - - #[test] - fn test_set_value_affects_flow_computation() { - // Model where birth_rate is ONLY used in flows (not in stock initial) - let tp = TestProject::new("flow_only_constant") - .with_sim_time(0.0, 10.0, 1.0) - .aux("birth_rate", "0.1", None) - .flow("births", "pop * birth_rate", None) - .stock("pop", "100", &["births"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - - // Run without override - let mut vm1 = Vm::new(compiled.clone()).unwrap(); - vm1.run_to_end().unwrap(); - let series1 = vm1.get_series(&Ident::new("pop")).unwrap(); - - // Run with override - let mut vm2 = Vm::new(compiled).unwrap(); - vm2.set_value(&Ident::new("birth_rate"), 0.5).unwrap(); - vm2.run_to_end().unwrap(); - let series2 = vm2.get_series(&Ident::new("pop")).unwrap(); - - // Higher birth_rate should produce higher final population - assert!( - series2.last().unwrap() > series1.last().unwrap(), - "higher birth_rate should produce higher final population: {} vs {}", - series2.last().unwrap(), - series1.last().unwrap() - ); - - // Verify birth_rate shows the overridden value - let br_series = vm2.get_series(&Ident::new("birth_rate")).unwrap(); - for (i, &val) in br_series.iter().enumerate() { - assert!( - (val - 0.5).abs() < 1e-10, - "birth_rate should be 0.5 at step {}, got {}", - i, - val - ); - } - } - - #[test] - fn test_override_does_not_corrupt_shared_literal() { - // Two constants with the same numeric value used to share an interned - // literal_id. Now they get distinct slots via push_named_literal. - // Overriding one must NOT affect the other. - let tp = TestProject::new("shared_literal") - .with_sim_time(0.0, 5.0, 1.0) - .aux("rate_a", "0.1", None) - .aux("rate_b", "0.1", None) - .aux("total_rate", "rate_a + rate_b", None) - .flow("inflow", "stock_val * total_rate", None) - .stock("stock_val", "100", &["inflow"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - let mut vm = Vm::new(compiled).unwrap(); - - // Override rate_a only, then run the full simulation - vm.set_value(&Ident::new("rate_a"), 0.5).unwrap(); - vm.run_to_end().unwrap(); - - let rate_a_series = vm.get_series(&Ident::new("rate_a")).unwrap(); - let rate_b_series = vm.get_series(&Ident::new("rate_b")).unwrap(); - - for (i, &val) in rate_a_series.iter().enumerate() { - assert!( - (val - 0.5).abs() < 1e-10, - "rate_a should be 0.5 at step {i}, got {val}" - ); - } - for (i, &val) in rate_b_series.iter().enumerate() { - assert!( - (val - 0.1).abs() < 1e-10, - "rate_b should remain 0.1 at step {i}, got {val} (must not be corrupted by rate_a override)" - ); - } - } - - #[test] - fn test_override_does_not_corrupt_expression_literal() { - // A constant and an expression both use the same numeric value 0.1. - // Overriding the constant must not corrupt the expression's literal. - let tp = TestProject::new("expr_literal") - .with_sim_time(0.0, 5.0, 1.0) - .aux("rate", "0.1", None) - .aux("scaled", "stock_val * 0.1", None) - .flow("inflow", "stock_val * rate + scaled", None) - .stock("stock_val", "100", &["inflow"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - let mut vm = Vm::new(compiled).unwrap(); - - vm.set_value(&Ident::new("rate"), 0.9).unwrap(); - vm.run_to_end().unwrap(); - - let scaled_series = vm.get_series(&Ident::new("scaled")).unwrap(); - // At t=0, stock_val = 100, so scaled = 100 * 0.1 = 10.0 - // If the literal 0.1 was corrupted to 0.9, scaled would be 90.0 - assert!( - (scaled_series[0] - 10.0).abs() < 1e-10, - "scaled should be 10.0 at t=0 (the 0.1 literal in the expression must not be corrupted), got {}", - scaled_series[0] - ); - } - - #[test] - fn test_same_valued_constants_get_distinct_literal_ids() { - // Two constants with the same numeric value should get distinct literal - // slots in their AssignConstCurr opcodes (via push_named_literal). - let tp = TestProject::new("distinct_lits") - .with_sim_time(0.0, 1.0, 1.0) - .aux("rate_a", "0.1", None) - .aux("rate_b", "0.1", None) - .flow("inflow", "rate_a + rate_b", None) - .stock("s", "0", &["inflow"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - let root_module = &compiled.modules[&compiled.root]; - - // Collect all AssignConstCurr literal_ids from the flows bytecode. - let assign_const_lits: Vec = root_module - .compiled_flows - .code - .iter() - .filter_map(|op| { - if let Opcode::AssignConstCurr { literal_id, .. } = op { - Some(*literal_id) - } else { - None - } - }) - .collect(); - - // rate_a and rate_b each get their own AssignConstCurr with distinct literal_ids. - assert!( - assign_const_lits.len() >= 2, - "expected at least 2 AssignConstCurr opcodes, got {}", - assign_const_lits.len() - ); - // All literal_ids should be unique (no sharing). - let unique: std::collections::HashSet = assign_const_lits.iter().copied().collect(); - assert_eq!( - unique.len(), - assign_const_lits.len(), - "literal_ids should all be distinct, got {:?}", - assign_const_lits - ); - } - - #[test] - fn test_override_shared_literal_clear_restores_both() { - let tp = TestProject::new("shared_clear") - .with_sim_time(0.0, 5.0, 1.0) - .aux("rate_a", "0.1", None) - .aux("rate_b", "0.1", None) - .flow("inflow", "rate_a + rate_b", None) - .stock("s", "rate_a + rate_b", &["inflow"], &[], None); - - let compiled = tp.compile_incremental().unwrap(); - let mut vm = Vm::new(compiled).unwrap(); - - vm.set_value(&Ident::new("rate_a"), 0.5).unwrap(); - vm.run_to_end().unwrap(); - - let rate_a_series = vm.get_series(&Ident::new("rate_a")).unwrap(); - let rate_b_series = vm.get_series(&Ident::new("rate_b")).unwrap(); - assert!( - (rate_a_series[0] - 0.5).abs() < 1e-10, - "rate_a should be 0.5" - ); - assert!( - (rate_b_series[0] - 0.1).abs() < 1e-10, - "rate_b should be 0.1" - ); - - // Clear and re-run - vm.clear_values(); - vm.reset(); - vm.run_to_end().unwrap(); - - let rate_a_restored = vm.get_series(&Ident::new("rate_a")).unwrap(); - let rate_b_restored = vm.get_series(&Ident::new("rate_b")).unwrap(); - assert!( - (rate_a_restored[0] - 0.1).abs() < 1e-10, - "rate_a should be restored to 0.1, got {}", - rate_a_restored[0] - ); - assert!( - (rate_b_restored[0] - 0.1).abs() < 1e-10, - "rate_b should still be 0.1, got {}", - rate_b_restored[0] - ); - } -} +#[path = "vm_set_value_tests.rs"] +mod set_value_tests; #[cfg(test)] mod stack_tests { @@ -4535,10 +4357,17 @@ mod superinstruction_tests { // ----------------------------------------------------------------------- // BinOpAssignCurr: e.g. `births = population * birth_rate` + // + // The peephole pass folds `Op2(Mul); AssignCurr` into `BinOpAssignCurr`, but + // the later 3-address fusion (R2 extension) -- which runs on the Vm's + // execution copy that `flow_opcodes` reads -- folds the whole leaf assign + // `result = rate * 2` (a var times a const) into the register-style + // `AssignMulVarConstCurr`. So `BinOpAssignCurr` is the *intermediate* form; + // the fused stream carries the operator-specialized leaf assign instead. // ----------------------------------------------------------------------- #[test] - fn test_binop_assign_curr_present_in_bytecode() { + fn test_binop_assign_curr_fuses_to_leaf_assign() { let tp = TestProject::new("binop_model") .with_sim_time(0.0, 1.0, 1.0) .aux("rate", "0.1", None) @@ -4548,12 +4377,21 @@ mod superinstruction_tests { let vm = build_vm(&tp); let ops = flow_opcodes(&vm); - let has_binop_curr = ops + let has_leaf_mul = ops .iter() - .any(|op| matches!(op, Opcode::BinOpAssignCurr { .. })); + .any(|op| matches!(op, Opcode::AssignMulVarConstCurr { .. })); assert!( - has_binop_curr, - "binary operation with assign should produce BinOpAssignCurr" + has_leaf_mul, + "`result = rate * 2` should fuse to AssignMulVarConstCurr in the \ + execution stream, got {:?}", + ops.iter().map(|o| o.name()).collect::>() + ); + // The intermediate `BinOpAssignCurr` must NOT survive into the fused + // stream for an {Add,Sub,Mul,Div} leaf assign. + assert!( + !ops.iter() + .any(|op| matches!(op, Opcode::BinOpAssignCurr { .. })), + "BinOpAssignCurr should have been superseded by the leaf-assign form" ); } @@ -4975,6 +4813,114 @@ mod superinstruction_tests { vm_s[3] ); } + + // ----------------------------------------------------------------------- + // R2 extension: fused leaf assignments (3->1) and stack-leaf assigns (2->1) + // ----------------------------------------------------------------------- + + /// Asserts the *actual fused opcodes* of the leaf-assign and stack-leaf-assign + /// forms, not just the numeric result. The fused stream lives on the Vm's + /// `sliced_sim` execution copy (the one `flow_opcodes` reads). Sub/Div are + /// non-commutative, so a swapped `l`/`r` encoding would also change the result. + #[test] + fn test_fused_leaf_assign_opcodes_present_in_flow_bytecode() { + let tp = TestProject::new("leaf_assign_opcodes") + .with_sim_time(0.0, 1.0, 1.0) + .aux("a", "20", None) + .aux("b", "5", None) + .aux("c", "2", None) + .aux("vv", "a - b", None) + .aux("dvv", "a / b", None) + .aux("vc", "a - 3", None) + .aux("cv", "10 - a", None) + .aux("sv", "(a - b) - c", None) + .aux("scn", "(a / b) / 2", None); + + let vm = build_vm(&tp); + let ops = flow_opcodes(&vm); + + // Leaf-assign forms (3->1) for the top-level binops. + assert!( + ops.iter() + .any(|op| matches!(op, Opcode::AssignSubVarVarCurr { .. })), + "`vv = a - b` should fuse to AssignSubVarVarCurr" + ); + assert!( + ops.iter() + .any(|op| matches!(op, Opcode::AssignDivVarVarCurr { .. })), + "`dvv = a / b` should fuse to AssignDivVarVarCurr" + ); + assert!( + ops.iter() + .any(|op| matches!(op, Opcode::AssignSubVarConstCurr { .. })), + "`vc = a - 3` should fuse to AssignSubVarConstCurr" + ); + assert!( + ops.iter() + .any(|op| matches!(op, Opcode::AssignSubConstVarCurr { .. })), + "`cv = 10 - a` should fuse to AssignSubConstVarCurr" + ); + // Stack-leaf forms (2->1) for the outer op of a nested expression. + assert!( + ops.iter() + .any(|op| matches!(op, Opcode::AssignStackVarCurr { op: Op2::Sub, .. })), + "`sv = (a - b) - c` outer op should fuse to AssignStackVarCurr" + ); + assert!( + ops.iter() + .any(|op| matches!(op, Opcode::AssignStackConstCurr { op: Op2::Div, .. })), + "`scn = (a / b) / 2` outer op should fuse to AssignStackConstCurr" + ); + + let res = tp.run_vm().unwrap(); + assert_eq!(res["vv"][0], 15.0, "a - b"); + assert_eq!(res["dvv"][0], 4.0, "a / b"); + assert_eq!(res["vc"][0], 17.0, "a - 3"); + assert_eq!(res["cv"][0], -10.0, "10 - a"); + assert_eq!(res["sv"][0], 13.0, "(a - b) - c"); + // Discriminating value for stack-leaf Div order: (20/5)/2 = 2, whereas a + // swapped 2/(20/5) would be 0.5. + assert_eq!(res["scn"][0], 2.0, "(a / b) / 2 = (20/5)/2 = 2"); + } + + /// Stock integration `stock' = stock + flow * dt` exercises the *Next* family + /// of fused assigns in the stock bytecode: guards that a Next variant writes + /// `next[]` (not `curr[]`) and the integration is numerically correct. + #[test] + fn test_fused_leaf_assign_next_in_stock_bytecode() { + let tp = TestProject::new("leaf_assign_next") + .with_sim_time(0.0, 2.0, 1.0) + .aux("inflow", "3", None) + .stock("s", "10", &["inflow"], &[], None); + + let vm = build_vm(&tp); + let ops = stock_opcodes(&vm); + // The update `s' = s + inflow*dt` produces some Next-family assign (the + // exact variant depends on emission order; this guards that *a* fused or + // unfused Next store is present and the result is exact). + let has_next_form = ops.iter().any(|op| { + matches!( + op, + Opcode::AssignStackVarNext { .. } + | Opcode::AssignStackConstNext { .. } + | Opcode::AssignAddVarVarNext { .. } + | Opcode::AssignAddConstVarNext { .. } + | Opcode::AssignAddVarConstNext { .. } + | Opcode::BinOpAssignNext { .. } + ) + }); + assert!( + has_next_form, + "stock integration should produce a Next-family assign, got {:?}", + ops.iter().map(|o| o.name()).collect::>() + ); + + // s(0)=10, s(1)=13, s(2)=16. + let res = tp.run_vm().unwrap(); + assert_eq!(res["s"][0], 10.0); + assert_eq!(res["s"][1], 13.0); + assert_eq!(res["s"][2], 16.0); + } } #[cfg(test)] diff --git a/src/simlin-engine/src/vm_profile.rs b/src/simlin-engine/src/vm_profile.rs index 7674072d2..465dd7f45 100644 --- a/src/simlin-engine/src/vm_profile.rs +++ b/src/simlin-engine/src/vm_profile.rs @@ -32,9 +32,32 @@ impl CompiledSimulation { bc.code.len() }; + // Histogram-only tally with no `total_literals` side effect. The fused + // streams below are temporary clones of the same module bytecode and + // share its literal table (fusion rewrites opcodes, never the literal + // pool), which `tally` already counts on the real bytecode -- counting + // it again would double `total_literals`. + let tally_hist = |bc: &ByteCode, hist: &mut BTreeMap<&'static str, usize>| { + for op in bc.code.iter() { + *hist.entry(op.name()).or_insert(0) += 1; + } + }; + for module in self.modules.values() { p.flow_opcodes += tally(&module.compiled_flows, &mut p.histogram); - p.flow_opcodes_after_fusion += module.compiled_flows.estimate_fused_len(); + // Measure the post-fusion size by running the *actual* fusion pass on + // a clone (the pass is what the Vm applies at construction), rather + // than a separate estimate that could drift from the real pass. The + // fused histogram tallies the executed flow+stock stream as the Vm + // sees it, so the per-fused-opcode counts (e.g. how many BinGlobal* + // / BinConstConst sites fired) are observable. + let mut fused = module.compiled_flows.as_ref().clone(); + fused.fuse_three_address(); + p.flow_opcodes_after_fusion += fused.code.len(); + tally_hist(&fused, &mut p.fused_histogram); + let mut fused_stocks = module.compiled_stocks.as_ref().clone(); + fused_stocks.fuse_three_address(); + tally_hist(&fused_stocks, &mut p.fused_histogram); p.stock_opcodes += tally(&module.compiled_stocks, &mut p.histogram); for ci in module.compiled_initials.iter() { p.n_initials += 1; @@ -83,4 +106,49 @@ pub struct BytecodeProfile { pub dim_lists: usize, pub names: usize, pub histogram: BTreeMap<&'static str, usize>, + /// Opcode histogram of the *post-fusion* flow+stock stream (the program the + /// Vm actually dispatches), so the count of each fused superinstruction + /// (BinVarVar, BinGlobal*, BinConstConst, ...) is directly observable. + pub fused_histogram: BTreeMap<&'static str, usize>, +} + +#[cfg(test)] +mod tests { + use crate::test_common::TestProject; + + /// Regression guard: `bytecode_profile` must count each compiled phase's + /// literal table exactly once. The fused-stream histogram is tallied from + /// temporary clones that share the real bytecode's literal pool, so it must + /// not contribute to `total_literals` -- otherwise every module's flow and + /// stock literals are counted twice. + #[test] + fn total_literals_excludes_fused_clone_tally() { + let compiled = TestProject::new("profile_literals") + .aux("rate", "0.5", None) + .stock("s", "100", &["inflow"], &[], None) + .flow("inflow", "s * rate + 3", None) + .compile_incremental() + .unwrap(); + + // Ground truth: each phase's literal table, counted once. + let mut expected = 0usize; + for m in compiled.modules.values() { + expected += m.compiled_flows.literals.len(); + expected += m.compiled_stocks.literals.len(); + for ci in m.compiled_initials.iter() { + expected += ci.bytecode.literals.len(); + } + } + assert!( + expected > 0, + "test model should produce literals (else vacuous)" + ); + + assert_eq!( + compiled.bytecode_profile().total_literals, + expected, + "total_literals must count each phase's literal table once; the \ + fused-clone tally must not re-count the shared literal pool" + ); + } } diff --git a/src/simlin-engine/src/vm_set_value_tests.rs b/src/simlin-engine/src/vm_set_value_tests.rs new file mode 100644 index 000000000..8af5952f0 --- /dev/null +++ b/src/simlin-engine/src/vm_set_value_tests.rs @@ -0,0 +1,545 @@ +// Copyright 2026 The Simlin Authors. All rights reserved. +// Use of this source code is governed by the Apache License, +// Version 2.0, that can be found in the LICENSE file. + +//! `Vm::set_value` / constant-override tests. +//! +//! Split out of `vm.rs` as a `#[path]`-included child module (so `super::*` +//! still reaches the parent module internals) to keep `vm.rs` under the +//! per-file line cap. Behaviour is unchanged; this is a pure relocation. + +use super::*; +use crate::test_common::TestProject; + +/// Model: rate=0.1, scaled_rate=rate*10, stock initial=scaled_rate. +/// `rate` and `scaled_rate` are both stock dependencies in the initials. +fn rate_model() -> TestProject { + TestProject::new("rate_model") + .with_sim_time(0.0, 10.0, 1.0) + .aux("rate", "0.1", None) + .aux("scaled_rate", "rate * 10", None) + .flow("inflow", "population * rate", None) + .flow("outflow", "population / 80", None) + .stock("population", "scaled_rate", &["inflow"], &["outflow"], None) +} + +fn build_compiled(tp: &TestProject) -> CompiledSimulation { + tp.compile_incremental() + .expect("incremental compile should succeed") +} + +#[test] +fn test_override_constant_flows_through_dependent_initials() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + + // Override rate from 0.1 to 0.2 + vm.set_value(&Ident::new("rate"), 0.2).unwrap(); + vm.run_initials().unwrap(); + + let rate_off = vm.get_offset(&Ident::new("rate")).unwrap(); + let sr_off = vm.get_offset(&Ident::new("scaled_rate")).unwrap(); + let pop_off = vm.get_offset(&Ident::new("population")).unwrap(); + + assert_eq!( + vm.get_value_now(rate_off), + 0.2, + "rate should be overridden to 0.2" + ); + assert_eq!( + vm.get_value_now(sr_off), + 2.0, + "scaled_rate = rate*10 = 0.2*10 = 2.0" + ); + assert_eq!( + vm.get_value_now(pop_off), + 2.0, + "population initial = scaled_rate = 2.0" + ); +} + +#[test] +fn test_override_affects_simulation_results() { + let compiled = build_compiled(&rate_model()); + + // Run without override + let mut vm1 = Vm::new(compiled.clone()).unwrap(); + vm1.run_to_end().unwrap(); + let series1 = vm1.get_series(&Ident::new("population")).unwrap(); + + // Run with override: higher rate means more growth + let mut vm2 = Vm::new(compiled).unwrap(); + vm2.set_value(&Ident::new("rate"), 0.2).unwrap(); + vm2.run_to_end().unwrap(); + let series2 = vm2.get_series(&Ident::new("population")).unwrap(); + + assert!( + series2.last().unwrap() > series1.last().unwrap(), + "higher rate should produce higher final population: {} vs {}", + series2.last().unwrap(), + series1.last().unwrap() + ); + + // Verify the override affects flows: rate should be 0.2 throughout + let rate_series = vm2.get_series(&Ident::new("rate")).unwrap(); + for (i, &val) in rate_series.iter().enumerate() { + assert!( + (val - 0.2).abs() < 1e-10, + "rate should be 0.2 at every step, got {} at step {}", + val, + i + ); + } +} + +#[test] +fn test_override_persists_across_reset() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + + vm.set_value(&Ident::new("rate"), 0.2).unwrap(); + vm.run_to_end().unwrap(); + let series_before = vm.get_series(&Ident::new("population")).unwrap(); + + vm.reset(); + vm.run_to_end().unwrap(); + let series_after = vm.get_series(&Ident::new("population")).unwrap(); + + for (i, (a, b)) in series_before.iter().zip(series_after.iter()).enumerate() { + assert!( + (a - b).abs() < 1e-10, + "override should persist across reset: step {i}: {a} vs {b}" + ); + } +} + +#[test] +fn test_clear_values_restores_defaults() { + let compiled = build_compiled(&rate_model()); + + // Baseline run + let mut vm_baseline = Vm::new(compiled.clone()).unwrap(); + vm_baseline.run_to_end().unwrap(); + let baseline = vm_baseline.get_series(&Ident::new("population")).unwrap(); + + // Run with override + let mut vm = Vm::new(compiled).unwrap(); + vm.set_value(&Ident::new("rate"), 0.5).unwrap(); + vm.run_to_end().unwrap(); + let overridden = vm.get_series(&Ident::new("population")).unwrap(); + + // Clear and re-run + vm.clear_values(); + vm.reset(); + vm.run_to_end().unwrap(); + let restored = vm.get_series(&Ident::new("population")).unwrap(); + + // Overridden should differ from baseline + assert!( + (overridden.last().unwrap() - baseline.last().unwrap()).abs() > 1.0, + "overridden should differ from baseline" + ); + // Restored should match baseline + for (i, (b, r)) in baseline.iter().zip(restored.iter()).enumerate() { + assert!( + (b - r).abs() < 1e-10, + "after clear_values, should match baseline: step {i}: {b} vs {r}" + ); + } +} + +#[test] +fn test_multiple_reset_set_value_cycles() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + + let mut prev_final = 0.0; + for i in 1..=10 { + let rate_val = i as f64 * 0.01; + vm.set_value(&Ident::new("rate"), rate_val).unwrap(); + vm.reset(); + vm.run_to_end().unwrap(); + let series = vm.get_series(&Ident::new("population")).unwrap(); + let final_val = *series.last().unwrap(); + if i > 1 { + assert!( + final_val > prev_final, + "final pop should increase with rate: rate={rate_val}, final={final_val}, prev={prev_final}" + ); + } + prev_final = final_val; + } +} + +#[test] +fn test_override_nonexistent_variable_returns_error() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + let result = vm.set_value(&Ident::new("nonexistent_var"), 1.0); + assert!( + result.is_err(), + "overriding nonexistent variable should fail" + ); +} + +#[test] +fn test_set_value_returns_correct_offset() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + let rate_ident = Ident::new("rate"); + + let expected_off = vm.get_offset(&rate_ident).unwrap(); + let returned_off = vm.set_value(&rate_ident, 0.5).unwrap(); + assert_eq!( + returned_off, expected_off, + "set_value should return the data-buffer offset of the variable" + ); +} + +#[test] +fn test_override_by_offset_out_of_bounds_returns_error() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + let err = vm.set_value_by_offset(99999, 1.0).unwrap_err(); + assert_eq!(err.code, crate::common::ErrorCode::BadOverride); +} + +#[test] +fn test_set_value_non_constant_variable_returns_error() { + // `births = pop * birth_rate` is a computed flow, not a simple constant + let tp = TestProject::new("non_constant_override") + .with_sim_time(0.0, 10.0, 1.0) + .aux("birth_rate", "0.1", None) + .flow("births", "pop * birth_rate", None) + .stock("pop", "100", &["births"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + let mut vm = Vm::new(compiled).unwrap(); + + // birth_rate IS a simple constant, so set_value should succeed + vm.set_value(&Ident::new("birth_rate"), 0.5).unwrap(); + + // births is a computed flow (not a constant), so set_value should fail + let err = vm.set_value(&Ident::new("births"), 42.0).unwrap_err(); + assert_eq!(err.code, crate::common::ErrorCode::BadOverride); +} + +#[test] +fn test_set_value_non_constant_returns_error() { + let tp = TestProject::new("non_constant_set") + .with_sim_time(0.0, 10.0, 1.0) + .aux("rate", "0.1", None) + .aux("computed", "rate * 10", None) + .flow("inflow", "pop * rate", None) + .stock("pop", "100", &["inflow"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + let mut vm = Vm::new(compiled).unwrap(); + + // "computed" depends on "rate", so it's not a simple constant + let err = vm.set_value(&Ident::new("computed"), 5.0).unwrap_err(); + assert_eq!(err.code, crate::common::ErrorCode::BadOverride); + + // Stocks also cannot be set via set_value + let err = vm.set_value(&Ident::new("pop"), 500.0).unwrap_err(); + assert_eq!(err.code, crate::common::ErrorCode::BadOverride); +} + +#[test] +fn test_set_value_after_initials_affects_flows_but_not_stock_initials() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + + // Run initials first (stock initial = scaled_rate = rate*10 = 1.0) + vm.run_initials().unwrap(); + + // Set value AFTER initials + vm.set_value(&Ident::new("rate"), 0.5).unwrap(); + + // The stock initial is already set (from rate=0.1), but flows will use rate=0.5 + vm.run_to_end().unwrap(); + let series1 = vm.get_series(&Ident::new("population")).unwrap(); + + // Now reset and run - BOTH initials and flows use rate=0.5 + vm.reset(); + vm.run_to_end().unwrap(); + let series2 = vm.get_series(&Ident::new("population")).unwrap(); + + // series1 used rate=0.1 for initials but rate=0.5 for flows + // series2 used rate=0.5 for both + // They should differ (different initial stock values) + assert!( + (series1[0] - series2[0]).abs() > 0.1, + "initial stock values should differ: first={}, second={}", + series1[0], + series2[0] + ); +} + +#[test] +fn test_conflicting_writes_to_same_offset() { + let compiled = build_compiled(&rate_model()); + let mut vm = Vm::new(compiled).unwrap(); + + let rate_off = vm.get_offset(&Ident::new("rate")).unwrap(); + + // Two writes to the same offset - last one wins + vm.set_value_by_offset(rate_off, 0.1).unwrap(); + vm.set_value_by_offset(rate_off, 0.3).unwrap(); + + vm.run_initials().unwrap(); + assert_eq!(vm.get_value_now(rate_off), 0.3, "last override should win"); +} + +#[test] +fn test_set_value_module_stock_returns_error() { + let test_file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../test/modules_hares_and_foxes/modules_hares_and_foxes.stmx" + ); + let file_bytes = + std::fs::read(test_file).expect("modules_hares_and_foxes test fixture must exist"); + let mut cursor = std::io::Cursor::new(file_bytes); + let project_datamodel = crate::open_xmile(&mut cursor).unwrap(); + let mut db = crate::db::SimlinDb::default(); + let sync = crate::db::sync_from_datamodel_incremental(&mut db, &project_datamodel, None); + let compiled = crate::db::compile_project_incremental(&db, sync.project, "main").unwrap(); + + let mut vm = Vm::new(compiled).unwrap(); + let hares_ident = Ident::new("hares.hares"); + assert!( + vm.get_offset(&hares_ident).is_some(), + "hares·hares should exist in offsets" + ); + // Stocks are not simple constants, so set_value should fail + let err = vm.set_value(&hares_ident, 500.0).unwrap_err(); + assert_eq!(err.code, crate::common::ErrorCode::BadOverride); +} + +#[test] +fn test_override_partial_array() { + let tp = TestProject::new("array_override") + .with_sim_time(0.0, 1.0, 1.0) + .named_dimension("Dim", &["A", "B", "C"]) + .array_with_ranges("arr[Dim]", vec![("A", "1"), ("B", "2"), ("C", "3")]) + .aux("total", "arr[A] + arr[B] + arr[C]", None) + .flow("inflow", "0", None) + .stock("s", "total", &["inflow"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + let mut vm = Vm::new(compiled).unwrap(); + + let arr_b_ident = Ident::new("arr[b]"); + let arr_b_off = vm + .get_offset(&arr_b_ident) + .expect("arr[b] should exist in offsets"); + vm.set_value_by_offset(arr_b_off, 99.0).unwrap(); + vm.run_initials().unwrap(); + assert_eq!( + vm.get_value_now(arr_b_off), + 99.0, + "arr[b] should be overridden to 99" + ); + let s_off = vm.get_offset(&Ident::new("s")).unwrap(); + // total = arr[A]+arr[B]+arr[C] = 1+99+3 = 103 + assert_eq!( + vm.get_value_now(s_off), + 103.0, + "stock should reflect overridden array element: 1+99+3=103" + ); +} + +#[test] +fn test_set_value_affects_flow_computation() { + // Model where birth_rate is ONLY used in flows (not in stock initial) + let tp = TestProject::new("flow_only_constant") + .with_sim_time(0.0, 10.0, 1.0) + .aux("birth_rate", "0.1", None) + .flow("births", "pop * birth_rate", None) + .stock("pop", "100", &["births"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + + // Run without override + let mut vm1 = Vm::new(compiled.clone()).unwrap(); + vm1.run_to_end().unwrap(); + let series1 = vm1.get_series(&Ident::new("pop")).unwrap(); + + // Run with override + let mut vm2 = Vm::new(compiled).unwrap(); + vm2.set_value(&Ident::new("birth_rate"), 0.5).unwrap(); + vm2.run_to_end().unwrap(); + let series2 = vm2.get_series(&Ident::new("pop")).unwrap(); + + // Higher birth_rate should produce higher final population + assert!( + series2.last().unwrap() > series1.last().unwrap(), + "higher birth_rate should produce higher final population: {} vs {}", + series2.last().unwrap(), + series1.last().unwrap() + ); + + // Verify birth_rate shows the overridden value + let br_series = vm2.get_series(&Ident::new("birth_rate")).unwrap(); + for (i, &val) in br_series.iter().enumerate() { + assert!( + (val - 0.5).abs() < 1e-10, + "birth_rate should be 0.5 at step {}, got {}", + i, + val + ); + } +} + +#[test] +fn test_override_does_not_corrupt_shared_literal() { + // Two constants with the same numeric value used to share an interned + // literal_id. Now they get distinct slots via push_named_literal. + // Overriding one must NOT affect the other. + let tp = TestProject::new("shared_literal") + .with_sim_time(0.0, 5.0, 1.0) + .aux("rate_a", "0.1", None) + .aux("rate_b", "0.1", None) + .aux("total_rate", "rate_a + rate_b", None) + .flow("inflow", "stock_val * total_rate", None) + .stock("stock_val", "100", &["inflow"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + let mut vm = Vm::new(compiled).unwrap(); + + // Override rate_a only, then run the full simulation + vm.set_value(&Ident::new("rate_a"), 0.5).unwrap(); + vm.run_to_end().unwrap(); + + let rate_a_series = vm.get_series(&Ident::new("rate_a")).unwrap(); + let rate_b_series = vm.get_series(&Ident::new("rate_b")).unwrap(); + + for (i, &val) in rate_a_series.iter().enumerate() { + assert!( + (val - 0.5).abs() < 1e-10, + "rate_a should be 0.5 at step {i}, got {val}" + ); + } + for (i, &val) in rate_b_series.iter().enumerate() { + assert!( + (val - 0.1).abs() < 1e-10, + "rate_b should remain 0.1 at step {i}, got {val} (must not be corrupted by rate_a override)" + ); + } +} + +#[test] +fn test_override_does_not_corrupt_expression_literal() { + // A constant and an expression both use the same numeric value 0.1. + // Overriding the constant must not corrupt the expression's literal. + let tp = TestProject::new("expr_literal") + .with_sim_time(0.0, 5.0, 1.0) + .aux("rate", "0.1", None) + .aux("scaled", "stock_val * 0.1", None) + .flow("inflow", "stock_val * rate + scaled", None) + .stock("stock_val", "100", &["inflow"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + let mut vm = Vm::new(compiled).unwrap(); + + vm.set_value(&Ident::new("rate"), 0.9).unwrap(); + vm.run_to_end().unwrap(); + + let scaled_series = vm.get_series(&Ident::new("scaled")).unwrap(); + // At t=0, stock_val = 100, so scaled = 100 * 0.1 = 10.0 + // If the literal 0.1 was corrupted to 0.9, scaled would be 90.0 + assert!( + (scaled_series[0] - 10.0).abs() < 1e-10, + "scaled should be 10.0 at t=0 (the 0.1 literal in the expression must not be corrupted), got {}", + scaled_series[0] + ); +} + +#[test] +fn test_same_valued_constants_get_distinct_literal_ids() { + // Two constants with the same numeric value should get distinct literal + // slots in their AssignConstCurr opcodes (via push_named_literal). + let tp = TestProject::new("distinct_lits") + .with_sim_time(0.0, 1.0, 1.0) + .aux("rate_a", "0.1", None) + .aux("rate_b", "0.1", None) + .flow("inflow", "rate_a + rate_b", None) + .stock("s", "0", &["inflow"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + let root_module = &compiled.modules[&compiled.root]; + + // Collect all AssignConstCurr literal_ids from the flows bytecode. + let assign_const_lits: Vec = root_module + .compiled_flows + .code + .iter() + .filter_map(|op| { + if let Opcode::AssignConstCurr { literal_id, .. } = op { + Some(*literal_id) + } else { + None + } + }) + .collect(); + + // rate_a and rate_b each get their own AssignConstCurr with distinct literal_ids. + assert!( + assign_const_lits.len() >= 2, + "expected at least 2 AssignConstCurr opcodes, got {}", + assign_const_lits.len() + ); + // All literal_ids should be unique (no sharing). + let unique: std::collections::HashSet = assign_const_lits.iter().copied().collect(); + assert_eq!( + unique.len(), + assign_const_lits.len(), + "literal_ids should all be distinct, got {:?}", + assign_const_lits + ); +} + +#[test] +fn test_override_shared_literal_clear_restores_both() { + let tp = TestProject::new("shared_clear") + .with_sim_time(0.0, 5.0, 1.0) + .aux("rate_a", "0.1", None) + .aux("rate_b", "0.1", None) + .flow("inflow", "rate_a + rate_b", None) + .stock("s", "rate_a + rate_b", &["inflow"], &[], None); + + let compiled = tp.compile_incremental().unwrap(); + let mut vm = Vm::new(compiled).unwrap(); + + vm.set_value(&Ident::new("rate_a"), 0.5).unwrap(); + vm.run_to_end().unwrap(); + + let rate_a_series = vm.get_series(&Ident::new("rate_a")).unwrap(); + let rate_b_series = vm.get_series(&Ident::new("rate_b")).unwrap(); + assert!( + (rate_a_series[0] - 0.5).abs() < 1e-10, + "rate_a should be 0.5" + ); + assert!( + (rate_b_series[0] - 0.1).abs() < 1e-10, + "rate_b should be 0.1" + ); + + // Clear and re-run + vm.clear_values(); + vm.reset(); + vm.run_to_end().unwrap(); + + let rate_a_restored = vm.get_series(&Ident::new("rate_a")).unwrap(); + let rate_b_restored = vm.get_series(&Ident::new("rate_b")).unwrap(); + assert!( + (rate_a_restored[0] - 0.1).abs() < 1e-10, + "rate_a should be restored to 0.1, got {}", + rate_a_restored[0] + ); + assert!( + (rate_b_restored[0] - 0.1).abs() < 1e-10, + "rate_b should still be 0.1, got {}", + rate_b_restored[0] + ); +}