diff --git a/src/simlin-engine/CLAUDE.md b/src/simlin-engine/CLAUDE.md index ce60d3e82..2a9a305a5 100644 --- a/src/simlin-engine/CLAUDE.md +++ b/src/simlin-engine/CLAUDE.md @@ -97,7 +97,7 @@ The unit subsystem is partial-result throughout: a single bad declaration or one - **`src/units.rs`** - Unit parsing and `UnitMap` representation. `Context::new`/`new_with_builtins` return `(Context, Vec<(unit_name, errors)>)`: the context always holds every *valid* declaration, with conflicting/duplicate ones reported alongside -- never an empty context (which would lose project-wide alias normalization like yr/year and re-create a spurious mismatch flood). - **`src/units_check.rs`** - Dimensional consistency checking across equations. A reference's units are `declared OR inferred`; unknown units are skipped (not an error). RANK is dimensionless (an ordinal index, not the ranked array's units). -- **`src/units_infer.rs`** - Hindley-Milner-style unit inference: constraint generation (`gen_constraints`, total -- returns `Units`, no fallible `Result`) + unification over the free abelian group of units (`unify`/`solve_for`/`substitute`). `infer` returns `InferenceResult { resolved, conflicts }` and keeps solving past a conflict (keeping the first binding -- a contradiction is confined to its connected component, since substitution only flows along shared metavariables), collecting *every* residual contradiction via `find_constraint_mismatches`. A macro body's declared units may name the formal parameters (a polymorphic Vensim idiom, e.g. `~ xfrom` inside RAMP FROM TO), so inference skips declared-units constraints for `is_macro` models (`ModelStage0`/`ModelStage1`) and lets those units be inferred from the equation + cross-module parameter bindings (mirroring `check_model_units`, which skips macro models); without this, keeping the resolved map re-floods C-LEARN with the `xfrom`/`xto` leak. +- **`src/units_infer.rs`** - Hindley-Milner-style unit inference: constraint generation (`gen_constraints`, total -- returns `Units`, no fallible `Result`) + unification over the free abelian group of units (`unify`/`solve_for`/`substitute`). `infer` returns `InferenceResult { resolved, conflicts }` and keeps solving past a conflict (keeping the first binding -- a contradiction is confined to its connected component, since substitution only flows along shared metavariables), collecting *every* residual contradiction via `find_constraint_mismatches`. A macro body's declared units may name the formal parameters (a polymorphic Vensim idiom, e.g. `~ xfrom`, or a parameter ratio such as xfrom over tstart, inside RAMP FROM TO). `gen_all_constraints` LOWERS each parameter-named unit identifier to that parameter's per-instantiation metavariable (`lower_macro_unit_to_metavars`, gated on `ModelStage1::is_macro`/`macro_params`), so it resolves to the actual argument units at each instantiation instead of leaking the parameter name as a literal base unit -- while genuine base units (`dmnl`) are kept and still checked. This both *resolves* parameter-named units (GH #619 point 1) and *checks* a macro body's declared signature against its equations (point 2), superseding the GH #618 skip-entirely containment (which neither resolved nor checked them). A conflict that involves a synthetic module/macro instantiation is rewritten by `clarify_macro_conflict` into a plain-language diagnostic naming the function and the using variable (parsed from the `$⁚{var}⁚{n}⁚{func}` synthetic name by `synthetic_owner_and_func`) rather than synthetic-name/metavariable text -- end users are modelers, not software developers (this also cleans up stdlib-module conflict messages). The cross-module parameter bindings + per-instantiation prefix already monomorphize the macro body, so the RAMP FROM TO storm GH #618 contained does not return. ## Special features diff --git a/src/simlin-engine/src/db_units.rs b/src/simlin-engine/src/db_units.rs index 3da212466..ed96cfcb9 100644 --- a/src/simlin-engine/src/db_units.rs +++ b/src/simlin-engine/src/db_units.rs @@ -216,6 +216,7 @@ pub fn check_model_units(db: &dyn Db, model: SourceModel, project: SourceProject errors: None, implicit: is_stdlib, is_macro: src_model.macro_spec(db).is_some(), + macro_params: crate::model::macro_param_idents(src_model.macro_spec(db).as_ref()), } }; diff --git a/src/simlin-engine/src/db_var_fragment.rs b/src/simlin-engine/src/db_var_fragment.rs index 2c633fd83..13af069ce 100644 --- a/src/simlin-engine/src/db_var_fragment.rs +++ b/src/simlin-engine/src/db_var_fragment.rs @@ -705,6 +705,7 @@ pub(crate) fn lower_var_fragment( implicit: false, // Single-variable fragment compilation only; not a macro template. is_macro: false, + macro_params: vec![], }; let mut models: HashMap, &crate::model::ModelStage0> = HashMap::new(); diff --git a/src/simlin-engine/src/model.rs b/src/simlin-engine/src/model.rs index 6e285b2e7..ba61e7f67 100644 --- a/src/simlin-engine/src/model.rs +++ b/src/simlin-engine/src/model.rs @@ -35,6 +35,19 @@ pub type DependencyMap = HashMap, BTreeSet>>; pub type VariableStage0 = Variable; +/// Canonical formal-parameter names of a macro (empty for a non-macro model). +/// Unit inference lowers a macro body's parameter-named unit declarations +/// (`~ xfrom`) to the parameters' metavariables so they resolve to the actual +/// argument units at each instantiation, rather than leaking the parameter +/// name as a literal base unit (GH #619). +pub(crate) fn macro_param_idents( + macro_spec: Option<&datamodel::MacroSpec>, +) -> Vec> { + macro_spec + .map(|spec| spec.parameters.iter().map(|p| Ident::new(p)).collect()) + .unwrap_or_default() +} + /// ModelStage0 converts a datamodel::Model to one with a map of canonicalized /// identifiers to Variables where module dependencies haven't been resolved. #[cfg_attr(feature = "debug-derive", derive(Debug))] @@ -53,6 +66,11 @@ pub struct ModelStage0 { /// RAMP FROM TO), so unit inference must treat those as polymorphic rather /// than concrete base units. pub is_macro: bool, + /// Canonical formal-parameter names when `is_macro` is true (empty + /// otherwise). Lets unit inference recognize which identifiers in a macro + /// body's unit declarations are parameters and lower them to the + /// corresponding metavariables (GH #619). + pub macro_params: Vec>, } #[cfg_attr(feature = "debug-derive", derive(Debug))] @@ -83,6 +101,9 @@ pub struct ModelStage1 { /// `ModelStage0::is_macro`. Inference treats a macro body's declared units /// as polymorphic rather than concrete base units. pub is_macro: bool, + /// Canonical formal-parameter names when `is_macro` is true (empty + /// otherwise); see `ModelStage0::macro_params` (GH #619). + pub macro_params: Vec>, } #[cfg_attr(feature = "debug-derive", derive(Debug))] @@ -975,6 +996,7 @@ impl ModelStage0 { errors: None, implicit, is_macro: x_model.macro_spec.is_some(), + macro_params: macro_param_idents(x_model.macro_spec.as_ref()), } } @@ -1077,6 +1099,7 @@ impl ModelStage0 { errors: None, implicit, is_macro: x_model.macro_spec.is_some(), + macro_params: macro_param_idents(x_model.macro_spec.as_ref()), } } } @@ -1123,6 +1146,7 @@ impl ModelStage1 { instantiations: None, implicit: model_s0.implicit, is_macro: model_s0.is_macro, + macro_params: model_s0.macro_params.clone(), } } diff --git a/src/simlin-engine/src/project.rs b/src/simlin-engine/src/project.rs index ce899f4b9..66d7d1e14 100644 --- a/src/simlin-engine/src/project.rs +++ b/src/simlin-engine/src/project.rs @@ -160,6 +160,7 @@ impl Project { errors: None, implicit: is_stdlib, is_macro: src_model.macro_spec(db).is_some(), + macro_params: crate::model::macro_param_idents(src_model.macro_spec(db).as_ref()), }); } diff --git a/src/simlin-engine/src/units_infer.rs b/src/simlin-engine/src/units_infer.rs index bc1f2d732..4f18a8ae8 100644 --- a/src/simlin-engine/src/units_infer.rs +++ b/src/simlin-engine/src/units_infer.rs @@ -266,6 +266,33 @@ impl ConstraintSet { } } +/// Lower a macro body's declared units, replacing each unit identifier that +/// names a formal parameter with that parameter's metavariable +/// (`@{prefix}{param}`). A genuine base unit -- any name that is not a +/// parameter, e.g. `dmnl` -- is kept verbatim. This is what makes a macro's +/// unit signature polymorphic: a `~ xfrom` declaration becomes a constraint +/// tying the body variable to the metavariable the module's input binding +/// already ties to the actual argument, so it resolves to the argument's units +/// at this instantiation instead of leaking a bogus literal base unit `xfrom` +/// (GH #619). `prefix` is the instantiation prefix of the macro body (e.g. +/// `$⁚ramped⁚0⁚ramp_from_to·`), so the metavariable is per-instantiation. +fn lower_macro_unit_to_metavars( + units: &UnitMap, + params: &[Ident], + prefix: &str, +) -> UnitMap { + let mut lowered = UnitMap::new(); + for (name, exp) in units.map.iter() { + let canonical = canonicalize(name); + if params.iter().any(|p| p.as_str() == &*canonical) { + lowered.map.insert(format!("@{prefix}{canonical}"), *exp); + } else { + lowered.map.insert(name.clone(), *exp); + } + } + lowered +} + /// Splits a UnitMap into its metavariable part (signature) and concrete part (residual). /// This enables O(n) mismatch detection by grouping constraints with the same signature. fn split_constraint(u: &UnitMap) -> (UnitMap, UnitMap) { @@ -387,6 +414,82 @@ fn find_constraint_mismatches(constraints: &[LocatedConstraint]) -> Vec Option<(String, String)> { + for segment in name.split('\u{b7}') { + if let Some(rest) = segment.strip_prefix("$\u{205A}") { + let parts: Vec<&str> = rest.split('\u{205A}').collect(); + // parts == [var, n, func, (subscript?)] + if parts.len() >= 3 { + return Some((parts[0].to_string(), parts[2].to_string())); + } + } + } + None +} + +/// Rewrite an inference conflict that involves a synthetic module/macro +/// instantiation into a clear, user-facing diagnostic. The synthetic +/// instantiation variable names (`$⁚…·…`) and the `@`-metavariable constraint +/// text are meaningless to a modeler, so we collapse the synthetic sources to +/// the owning user variable and rebuild the message to name the function and +/// the variable using it (GH #619). A conflict with no synthetic source is an +/// ordinary user-level diagnostic and is left untouched. Distinct raw +/// conflicts that clarify to the same message dedupe in the caller, so a single +/// inconsistent macro yields one clear warning rather than one per internal +/// contradiction. +fn clarify_macro_conflict(error: UnitError) -> UnitError { + let UnitError::InferenceError { + code, + sources, + details, + } = error + else { + return error; + }; + + let owner_func = sources + .iter() + .find_map(|(var, _)| synthetic_owner_and_func(var)); + let Some((owner, func)) = owner_func else { + // No synthetic instantiation involved: already a user-level diagnostic. + return UnitError::InferenceError { + code, + sources, + details, + }; + }; + + // Collapse synthetic sources to the owning user variable, keeping any + // genuine (non-synthetic) user variables that were also involved so the + // modeler still sees which of their own variables participated. + let mut clean_sources: Vec<(String, Option)> = vec![(owner.clone(), None)]; + for (var, loc) in &sources { + if synthetic_owner_and_func(var).is_none() + && !clean_sources.iter().any(|(existing, _)| existing == var) + { + clean_sources.push((var.clone(), *loc)); + } + } + + UnitError::InferenceError { + code, + sources: clean_sources, + details: Some(format!( + "units in '{func}' (used by variable '{owner}') are inconsistent" + )), + } +} + impl UnitInferer<'_> { /// gen_constraints generates a set of equality constraints for a given expression, /// storing those constraints in the mutable `constraints` argument. This is @@ -838,23 +941,30 @@ impl UnitInferer<'_> { )); } } - // A macro is a polymorphic template: its body variables' declared - // units may name the macro's formal parameters (a Vensim idiom, - // e.g. `~ xfrom` inside RAMP FROM TO), which would otherwise leak - // the parameter name as a literal base unit into every - // instantiation and conflict with the real argument units. So we - // skip declared-units constraints for macro bodies and let those - // units be inferred polymorphically from the equation and the - // cross-module parameter bindings instead. (This mirrors - // `check_model_units`, which skips unit-checking macro models - // entirely.) - if !model.is_macro - && let Some(units) = var.units() - { + // Declared-units constraint. A macro is a polymorphic template: + // its body variables' declared units may name the macro's formal + // parameters (a Vensim idiom, e.g. `~ xfrom` inside RAMP FROM TO). + // Treating such a name as a literal base unit would leak the + // parameter name into every instantiation and collide with the + // real argument units, so for a macro body we lower each + // parameter-named unit to that parameter's metavariable -- the + // declared units then resolve to the actual argument units at this + // instantiation, AND a genuine declared-vs-equation inconsistency + // is still caught (genuine base units like `dmnl` are kept and + // checked). A non-macro model contributes its declared units + // verbatim. (GH #619; the earlier GH #618 fix skipped macro + // declarations entirely -- containing the leak but neither + // resolving nor checking them.) + if let Some(units) = var.units() { let mv: UnitMap = [(format!("@{prefix}{id}"), 1)].iter().cloned().collect(); + let declared = if model.is_macro { + lower_macro_unit_to_metavars(units, &model.macro_params, prefix) + } else { + units.clone() + }; // User-defined unit declarations don't have equation locations constraints.push(LocatedConstraint::new( - combine(UnitOp::Div, mv, units.clone()), + combine(UnitOp::Div, mv, declared), ¤t_var, None, )); @@ -935,6 +1045,7 @@ impl UnitInferer<'_> { // residual constraint) to report each once. let mut conflicts: Vec = Vec::new(); for conflict in find_constraint_mismatches(&leftover) { + let conflict = clarify_macro_conflict(conflict); if !conflicts.contains(&conflict) { conflicts.push(conflict); } @@ -1341,6 +1452,235 @@ fn test_macro_body_units_naming_parameters_are_polymorphic() { ); } +/// A macro-body variable whose units are pinned ONLY by a parameter-named +/// declaration (`~ amount`) -- not derivable from its own (constant) equation +/// -- must RESOLVE to the actual argument's units at each instantiation. This +/// is the "resolve, don't merely contain" half of GH #619: the GH #618 +/// containment skipped the declaration entirely, so the body variable (and the +/// macro result that reads it) were left unresolved. Lowering the parameter +/// name to the parameter's metavariable ties it to the actual argument. +#[test] +fn test_macro_param_named_units_resolve_to_actual_arg() { + let sim_specs = sim_specs_with_units("year"); + let units_ctx = Context::new_with_builtins(&[], &sim_specs).0; + + // macro carryunits(amount): + // carryunits = held (no declared units; equation only) + // held = 5 ~ amount (CONSTANT equation -> units come ONLY + // from the parameter-named declaration) + // amount = 0 (parameter placeholder) + let macro_model = crate::datamodel::Model { + name: "carryunits".to_string(), + sim_specs: None, + variables: vec![ + x_aux("carryunits", "held", None), + x_aux("held", "5", Some("amount")), + x_aux("amount", "0", None), + ], + views: vec![], + loop_metadata: vec![], + groups: vec![], + macro_spec: Some(crate::datamodel::MacroSpec { + parameters: vec!["amount".to_string()], + primary_output: "carryunits".to_string(), + additional_outputs: vec![], + }), + }; + let root = x_model( + "main", + vec![ + x_aux("source", "10", Some("widget")), + x_aux("scaled", "carryunits(source)", None), + ], + ); + let project_datamodel = x_project(sim_specs.clone(), &[root, macro_model]); + + let mut result = InferenceResult::default(); + let db = crate::db::SimlinDb::default(); + let sync = crate::db::sync_from_datamodel(&db, &project_datamodel); + let _project = crate::project::Project::from_salsa( + project_datamodel.clone(), + &db, + sync.project, + |models, units_ctx, model| { + if model.name.as_str() == "main" { + result = infer(models, units_ctx, model); + } + }, + ); + + assert!( + result.conflicts.is_empty(), + "no conflict expected for a consistent macro; got: {:?}", + result.conflicts + ); + let widget: UnitMap = crate::units::parse_units(&units_ctx, Some("widget")) + .unwrap() + .unwrap(); + assert_eq!( + result.resolved.get(&*canonicalize("scaled")), + Some(&widget), + "a parameter-named macro-body unit must resolve to the actual argument's units" + ); +} + +/// When a macro's declared units are internally inconsistent with its +/// equations, the conflict must be reported with a CLEAR, user-facing message +/// that names the macro and the variable using it -- NOT the synthetic +/// instantiation names (`$⁚...`) or raw `@` unit metavariables (GH #619). End +/// users are modelers, not software developers, so the diagnostic has to be +/// comprehensible. +#[test] +fn test_inconsistent_macro_reports_clear_user_facing_conflict() { + let sim_specs = sim_specs_with_units("year"); + + // macro squareit(amount): + // squareit = amount * amount ~ amount (equation units = amount^2, but + // the signature declares `amount`) + // amount = 0 + let macro_model = crate::datamodel::Model { + name: "squareit".to_string(), + sim_specs: None, + variables: vec![ + x_aux("squareit", "amount * amount", Some("amount")), + x_aux("amount", "0", None), + ], + views: vec![], + loop_metadata: vec![], + groups: vec![], + macro_spec: Some(crate::datamodel::MacroSpec { + parameters: vec!["amount".to_string()], + primary_output: "squareit".to_string(), + additional_outputs: vec![], + }), + }; + let root = x_model( + "main", + vec![ + x_aux("source", "10", Some("widget")), + x_aux("result", "squareit(source)", None), + ], + ); + let project_datamodel = x_project(sim_specs.clone(), &[root, macro_model]); + + let mut result = InferenceResult::default(); + let db = crate::db::SimlinDb::default(); + let sync = crate::db::sync_from_datamodel(&db, &project_datamodel); + let _project = crate::project::Project::from_salsa( + project_datamodel.clone(), + &db, + sync.project, + |models, units_ctx, model| { + if model.name.as_str() == "main" { + result = infer(models, units_ctx, model); + } + }, + ); + + assert!( + !result.conflicts.is_empty(), + "an internally-inconsistent macro must produce a conflict" + ); + let rendered = result + .conflicts + .iter() + .map(|c| format!("{c}")) + .collect::>() + .join("\n"); + assert!( + !rendered.contains('\u{205A}'), + "diagnostic must not expose synthetic instantiation names (got: {rendered})" + ); + assert!( + !rendered.contains('@'), + "diagnostic must not expose raw unit metavariables (got: {rendered})" + ); + assert!( + rendered.contains("squareit"), + "diagnostic should name the macro (got: {rendered})" + ); + assert!( + rendered.contains("result"), + "diagnostic should name the variable that uses the macro (got: {rendered})" + ); +} + +/// A macro that mixes parameter-named units (`~ xfrom`, `~ xfrom/tstart`) with +/// genuine base units (`~ dmnl`), instantiated with concrete arguments, must +/// infer cleanly with NO conflicts: the parameter names lower to the argument +/// metavariables (so there is no `xfrom`-as-a-literal-base-unit storm) while +/// genuine base units are still honored. This mirrors C-LEARN's RAMP FROM TO +/// and guards against re-introducing the leak GH #618 contained. +#[test] +fn test_macro_mixed_param_and_base_units_infer_cleanly() { + let sim_specs = sim_specs_with_units("year"); + let units_ctx = Context::new_with_builtins(&[], &sim_specs).0; + + // macro rampish(xfrom, tstart): + // rampish = xfrom + slope * tstart ~ xfrom (output) + // slope = xfrom / tstart ~ xfrom/tstart (two params) + // flag = 1 ~ dmnl (genuine base unit) + // xfrom = 0 + // tstart = 0 + let macro_model = crate::datamodel::Model { + name: "rampish".to_string(), + sim_specs: None, + variables: vec![ + x_aux("rampish", "xfrom + slope * tstart", Some("xfrom")), + x_aux("slope", "xfrom / tstart", Some("xfrom/tstart")), + x_aux("flag", "1", Some("dmnl")), + x_aux("xfrom", "0", None), + x_aux("tstart", "0", None), + ], + views: vec![], + loop_metadata: vec![], + groups: vec![], + macro_spec: Some(crate::datamodel::MacroSpec { + parameters: vec!["xfrom".to_string(), "tstart".to_string()], + primary_output: "rampish".to_string(), + additional_outputs: vec![], + }), + }; + // `dur` carries the time units (year); `amt` is an arbitrary base unit. + let root = x_model( + "main", + vec![ + x_aux("amt", "10", Some("widget")), + x_aux("dur", "3", Some("year")), + x_aux("ramped", "rampish(amt, dur)", None), + ], + ); + let project_datamodel = x_project(sim_specs.clone(), &[root, macro_model]); + + let mut result = InferenceResult::default(); + let db = crate::db::SimlinDb::default(); + let sync = crate::db::sync_from_datamodel(&db, &project_datamodel); + let _project = crate::project::Project::from_salsa( + project_datamodel.clone(), + &db, + sync.project, + |models, units_ctx, model| { + if model.name.as_str() == "main" { + result = infer(models, units_ctx, model); + } + }, + ); + + assert!( + result.conflicts.is_empty(), + "a macro mixing parameter-named and genuine base units must infer cleanly; got: {:?}", + result.conflicts + ); + let widget: UnitMap = crate::units::parse_units(&units_ctx, Some("widget")) + .unwrap() + .unwrap(); + assert_eq!( + result.resolved.get(&*canonicalize("ramped")), + Some(&widget), + "the macro result (declared ~ xfrom) should resolve to the first argument's units" + ); +} + pub(crate) fn infer( models: &HashMap, &ModelStage1>, units_ctx: &Context,