diff --git a/src/simlin-engine/src/db.rs b/src/simlin-engine/src/db.rs index 6dd0829d9..b48e22c5a 100644 --- a/src/simlin-engine/src/db.rs +++ b/src/simlin-engine/src/db.rs @@ -1256,379 +1256,6 @@ pub fn model_dependency_graph( // ── Diagnostic collection ────────────────────────────────────────────── -/// Collect the identifiers that must share units because they sit in the -/// "value branches" of an `if isModuleInput(x) then x else y` conditional. -/// -/// Every stdlib delay/smooth module's stock-init equation selects between -/// a caller-supplied `initial_value` and the module's `input`; the stdlib -/// marks this choice with an `isModuleInput(initial_value)` predicate, so -/// `initial_value` (then-branch) and `input` (else-branch) are the pair -/// whose units must agree. Other identifiers that appear elsewhere in -/// the init AST -- notably `delay_time` in `delay1`/`delay3`, which is -/// multiplied against the value-branch result -- are *coefficients*, not -/// value-equivalents, and their units legitimately differ. Grabbing every -/// identifier (as an earlier version of this code did via `identifier_set` -/// and then `collect_idents`) conflates these roles. -/// -/// We walk the AST looking for any `If(App(IsModuleInput(_)), t, f)` subtree -/// and, on the first match, record *only the bare `Var` identifiers* that -/// appear directly as `t` or `f`. An identifier embedded in arithmetic -- -/// for example `trend`'s then-branch `input / (1 + delay_time * -/// initial_value)` -- is playing a coefficient or rate role, not a -/// value-equivalence role, and must NOT be collapsed into the equivalence -/// group. If the matched branches are both non-bare (or no `isModuleInput` -/// subtree exists), we return an empty set and the pairwise-compatibility -/// check in `check_model_units` skips this stock entirely. That is the -/// correct conservative behaviour: without a structural value-swap marker -/// we have no basis for unit equivalence. -fn init_value_equivalence_group( - ast: &crate::ast::Ast, -) -> HashSet> { - use crate::ast::{Ast, Expr2}; - use crate::builtins::BuiltinFn; - - /// If `expr` is a bare `Var`, insert its identifier into `out`. A bare - /// reference directly under the if-then-else is the stdlib's signal - /// that a module-input slot is interchangeable with its sibling - /// branch's bare reference; anything wrapped in arithmetic (or a - /// builtin call, subscript, nested conditional, etc.) means the - /// identifier is playing a different role and should be left out of - /// the equivalence group. - fn try_insert_bare_var(expr: &Expr2, out: &mut HashSet>) { - if let Expr2::Var(id, _, _) = expr { - out.insert(id.clone()); - } - } - - /// Walk the AST looking for an `If(App(IsModuleInput(_)), t, f, ..)` - /// subtree; on the first match record the bare-Var idents (if any) - /// from both branches and return. We match only the first such - /// subtree -- stdlib modules use this pattern at most once per init - /// equation, and a second isModuleInput inside a branch would - /// indicate a different constraint we do not want to collapse. - fn find_value_branches(expr: &Expr2, out: &mut HashSet>) -> bool { - match expr { - Expr2::If(cond, t, f, _, _) => { - if let Expr2::App(BuiltinFn::IsModuleInput(_, _), _, _) = cond.as_ref() { - try_insert_bare_var(t, out); - try_insert_bare_var(f, out); - return true; - } - find_value_branches(cond, out) - || find_value_branches(t, out) - || find_value_branches(f, out) - } - Expr2::Op2(_, l, r, _, _) => find_value_branches(l, out) || find_value_branches(r, out), - Expr2::Op1(_, e, _, _) => find_value_branches(e, out), - Expr2::App(builtin, _, _) => { - use crate::builtins::{BuiltinContents, walk_builtin_expr}; - let mut found = false; - walk_builtin_expr(builtin, |c| { - if let BuiltinContents::Expr(inner) | BuiltinContents::LookupTable(inner) = c - && !found - { - found = find_value_branches(inner, &mut *out); - } - }); - found - } - Expr2::Subscript(_, args, _, _) => { - for arg in args { - if let crate::ast::IndexExpr2::Expr(e) = arg - && find_value_branches(e, out) - { - return true; - } - } - false - } - Expr2::Const(_, _, _) | Expr2::Var(_, _, _) => false, - } - } - - let mut out = HashSet::new(); - match ast { - Ast::Scalar(expr) => { - find_value_branches(expr, &mut out); - } - Ast::ApplyToAll(_, expr) => { - find_value_branches(expr, &mut out); - } - Ast::Arrayed(_, elements, default_expr, _) => { - for expr in elements.values() { - find_value_branches(expr, &mut out); - } - if let Some(default_expr) = default_expr { - find_value_branches(default_expr, &mut out); - } - } - } - out -} - -/// Per-model tracked function that performs unit inference and checking, -/// accumulating unit warnings/errors through the salsa accumulator. -/// -/// Builds temporary ModelStage0/ModelStage1 representations from the -/// salsa-cached parsed variables, then runs the same unit inference and -/// checking pipeline as the old `run_default_model_checks` callback. -/// Unit mismatches are accumulated as DiagnosticSeverity::Warning to -/// match the old-path behavior where unit issues don't block simulation. -/// -/// Stdlib (implicit) models are skipped because they are generic -/// templates that only make sense when instantiated with specific inputs. -#[salsa::tracked] -pub fn check_model_units(db: &dyn Db, model: SourceModel, project: SourceProject) { - use crate::common::{Canonical, ErrorCode, ErrorKind, Ident}; - use crate::dimensions::DimensionsContext; - use crate::model::{ModelStage0, ModelStage1, ScopeStage0, VariableStage0}; - - // Skip stdlib models -- they are generic and unit checking doesn't - // apply until instantiated with concrete inputs. Stdlib model names - // start with the "stdlib\u{205A}" prefix (two-dot punctuation separator). - if model.name(db).starts_with("stdlib\u{205A}") { - return; - } - - let model_name = model.name(db).clone(); - let units_ctx = project_units_context(db, project); - let dm_dims = project_datamodel_dims(db, project); - let dim_context = DimensionsContext::from(dm_dims.as_slice()); - - // Helper: build a ModelStage0 from a SourceModel's parsed variables. - let build_model_s0 = |src_model: &SourceModel, is_stdlib: bool| -> ModelStage0 { - let src_vars = src_model.variables(db); - let module_ctx = model_module_ident_context(db, *src_model, project, vec![]); - let mut var_list: Vec = Vec::new(); - let mut implicit_dm: Vec = Vec::new(); - for (_name, svar) in src_vars.iter() { - let parsed = parse_source_variable_with_module_context(db, *svar, project, module_ctx); - var_list.push(parsed.variable.clone()); - implicit_dm.extend(parsed.implicit_vars.iter().cloned()); - } - // Parse implicit vars (SMOOTH/DELAY expansion). - let mut dummy: Vec = Vec::new(); - var_list.extend(implicit_dm.into_iter().map(|dm_var| { - crate::variable::parse_var(dm_dims, &dm_var, &mut dummy, units_ctx, |mi| { - Ok(Some(mi.clone())) - }) - })); - let variables: HashMap, VariableStage0> = var_list - .into_iter() - .map(|v| (Ident::new(v.ident()), v)) - .collect(); - ModelStage0 { - ident: Ident::new(src_model.name(db)), - display_name: src_model.name(db).clone(), - variables, - errors: None, - implicit: is_stdlib, - } - }; - - // Build ModelStage0 for all project models so that cross-module unit - // inference constraints (module inputs/outputs) can resolve submodel - // variable types. Stdlib models are included in the map because user - // models may reference them as modules. - let project_models = project.models(db); - let mut all_s0: Vec = Vec::new(); - for (name, src_model) in project_models.iter() { - let is_stdlib = name.starts_with("stdlib\u{205A}"); - all_s0.push(build_model_s0(src_model, is_stdlib)); - } - - let models_s0: HashMap, &ModelStage0> = - all_s0.iter().map(|m| (m.ident.clone(), m)).collect(); - - // Lower all ModelStage0 -> ModelStage1. - let all_s1: Vec = all_s0 - .iter() - .map(|ms0| { - let scope = ScopeStage0 { - models: &models_s0, - dimensions: &dim_context, - model_name: ms0.ident.as_str(), - }; - ModelStage1::new(&scope, ms0) - }) - .collect(); - - let models_s1: HashMap, &ModelStage1> = - all_s1.iter().map(|m| (m.name.clone(), m)).collect(); - - // Find the target model in the lowered map. - let target_ident = Ident::::new(&model_name); - let target_model = match models_s1.get(&target_ident) { - Some(m) => *m, - None => return, - }; - - // Check whether the model declares units on any variable. If not, - // skip surfacing inference errors (the model wasn't designed with - // dimensional analysis in mind). - let has_declared_units = target_model - .variables - .values() - .any(|var| var.units().is_some()); - - // Run unit inference. - let inferred_units = crate::units_infer::infer(&models_s1, units_ctx, target_model) - .unwrap_or_else(|err| { - if has_declared_units - && let crate::common::UnitError::InferenceError { code, .. } = &err - && *code == ErrorCode::UnitMismatch - { - CompilationDiagnostic(Diagnostic { - model: model_name.clone(), - variable: None, - error: DiagnosticError::Model(crate::common::Error { - kind: ErrorKind::Model, - code: *code, - details: Some(format!("{}", err)), - }), - severity: DiagnosticSeverity::Warning, - }) - .accumulate(db); - } - Default::default() - }); - - // Check stdlib module argument unit compatibility. - // - // The unit inference handles cross-module constraints recursively, but - // implicit module variables (from SMOOTH/DELAY expansion) may not be - // fully processed by the inference when the sub-model's internal - // constraints aren't yet resolved. We do an explicit check here: for - // each implicit Module variable in the target model, verify that - // arguments bound to the same internal variable have compatible units. - // - // For stdlib modules like SMTH1, the first argument (input) and third - // argument (initial_value) must have the same units because they both - // feed into the stock's init equation. We check this by looking up - // each argument's units (declared or inferred) and comparing. - if has_declared_units { - for (var_ident, var) in target_model.variables.iter() { - if let crate::variable::Variable::Module { - model_name: sub_model_name, - inputs, - .. - } = var - { - // Only check stdlib modules where we know the constraint structure - if !sub_model_name.as_str().starts_with("stdlib\u{205A}") { - continue; - } - let submodel = match models_s1.get(sub_model_name) { - Some(m) => m, - None => continue, - }; - // Find groups of inputs that must have compatible units. - // - // In smth1/smth3 the stock's init equation is - // `if isModuleInput(initial_value) then initial_value else input`, - // constraining `input` and `initial_value` (and nothing else) to - // share units. In delay1/delay3 the same conditional is - // multiplied by `delay_time`, which is a coefficient whose units - // are independent. We specifically extract the identifiers that - // sit in the value branches of the `if isModuleInput(...)` test - // (see `init_value_equivalence_group`); the simple textual - // `identifier_set` would also return `delay_time`, which is what - // produced the spurious delay3 mismatches in World3. - let stock_init_deps: Vec>> = submodel - .variables - .values() - .filter_map(|sv| { - if matches!(sv, crate::variable::Variable::Stock { .. }) { - sv.ast().map(init_value_equivalence_group) - } else { - None - } - }) - .collect(); - - for init_dep_set in &stock_init_deps { - // Collect (src_units, input) pairs for inputs that bind to - // variables in this stock's init dep set. - let mut group_units: Vec<(Ident, &crate::datamodel::UnitMap)> = - Vec::new(); - for input in inputs { - if !init_dep_set.contains(&input.dst) { - continue; - } - let src_units = target_model - .variables - .get(&input.src) - .and_then(|v| v.units()) - .or_else(|| inferred_units.get(&input.src)); - if let Some(units) = src_units { - group_units.push((input.src.clone(), units)); - } - } - // Check pairwise compatibility - if group_units.len() >= 2 { - let (first_src, first_units) = &group_units[0]; - for (other_src, other_units) in &group_units[1..] { - if first_units != other_units { - CompilationDiagnostic(Diagnostic { - model: model_name.clone(), - variable: Some(var_ident.to_string()), - error: DiagnosticError::Unit( - crate::common::UnitError::ConsistencyError( - ErrorCode::UnitMismatch, - crate::builtins::Loc::default(), - Some(format!( - "module '{}': argument '{}' has units '{}' \ - but argument '{}' has units '{}' \ - (both feed the same internal variable)", - var_ident, - first_src, - first_units, - other_src, - other_units, - )), - ), - ), - severity: DiagnosticSeverity::Warning, - }) - .accumulate(db); - } - } - } - } - } - } - } - - // Run unit checking. - match crate::units_check::check(units_ctx, &inferred_units, target_model) { - Ok(Ok(())) => {} - Ok(Err(errors)) => { - for (ident, err) in errors.into_iter() { - CompilationDiagnostic(Diagnostic { - model: model_name.clone(), - variable: Some(ident.to_string()), - error: DiagnosticError::Unit(err), - severity: DiagnosticSeverity::Warning, - }) - .accumulate(db); - } - } - Err(err) => { - CompilationDiagnostic(Diagnostic { - model: model_name.clone(), - variable: None, - error: DiagnosticError::Model(crate::common::Error { - kind: ErrorKind::Model, - code: ErrorCode::Generic, - details: Some(format!("unit checking failed: {}", err)), - }), - severity: DiagnosticSeverity::Warning, - }) - .accumulate(db); - } - } -} - /// Per-model tracked function that triggers diagnostic accumulation from /// all compilation stages. The salsa accumulator is the sole error source /// for diagnostic reporting -- this function does not read struct fields. @@ -1668,8 +1295,10 @@ pub fn model_all_diagnostics(db: &dyn Db, model: SourceModel, project: SourcePro // Trigger unit checking. This is a separate tracked function so // that unit inference results are individually cached and - // invalidated only when unit-relevant inputs change. - check_model_units(db, model, project); + // invalidated only when unit-relevant inputs change. It lives in the + // sibling `db_units` module (kept out of `db.rs` for the per-file line + // cap). + crate::db_units::check_model_units(db, model, project); // When LTM is enabled, also trigger the LTM diagnostic pass so that // diagnostics accumulated by the LTM pipeline surface through diff --git a/src/simlin-engine/src/db_units.rs b/src/simlin-engine/src/db_units.rs new file mode 100644 index 000000000..05745e277 --- /dev/null +++ b/src/simlin-engine/src/db_units.rs @@ -0,0 +1,422 @@ +// Copyright 2026 The Simlin Authors. All rights reserved. +// Use of this source code is governed by the Apache License, +// Version 2.0, that can be found in the LICENSE file. + +// pattern: Imperative Shell +// +// Salsa-tracked unit-checking orchestration: it reads salsa inputs, rebuilds +// the temporary ModelStage0/ModelStage1 representations, runs the pure unit +// inference (`units_infer`) and consistency checking (`units_check`) cores, +// and accumulates the resulting diagnostics. The dimensional-analysis logic +// itself is the Functional Core in `units.rs`/`units_infer.rs`/`units_check.rs`; +// this module only wires it into the salsa graph. + +//! Per-model unit inference and checking as a salsa-tracked query. +//! +//! `check_model_units` is the single salsa-tracked entry point that runs unit +//! inference + consistency checking for one model and accumulates unit +//! warnings. It is invoked by `db::model_all_diagnostics`. +//! +//! Stdlib and macro-marked models are skipped: both are generic templates +//! whose formal parameters are unitless, so checking them in isolation only +//! produces noise; their unit correctness is validated at each instantiation +//! through the cross-module constraints `units_infer` generates. +//! +//! This is a top-level module (a sibling of `db`, like `db_macro_registry` +//! and `db_dep_graph`) rather than a submodule of `db.rs` purely to keep +//! `db.rs` under the per-file line cap (`scripts/lint-project.sh` rule 2); +//! `db::model_all_diagnostics` reaches it via `crate::db_units::...`. + +use std::collections::{HashMap, HashSet}; + +use salsa::Accumulator; + +use crate::common::{Canonical, Ident}; +use crate::datamodel; +use crate::db::{ + CompilationDiagnostic, Db, Diagnostic, DiagnosticError, DiagnosticSeverity, SourceModel, + SourceProject, model_module_ident_context, parse_source_variable_with_module_context, + project_datamodel_dims, project_units_context, +}; + +/// Collect the identifiers that must share units because they sit in the +/// "value branches" of an `if isModuleInput(x) then x else y` conditional. +/// +/// Every stdlib delay/smooth module's stock-init equation selects between +/// a caller-supplied `initial_value` and the module's `input`; the stdlib +/// marks this choice with an `isModuleInput(initial_value)` predicate, so +/// `initial_value` (then-branch) and `input` (else-branch) are the pair +/// whose units must agree. Other identifiers that appear elsewhere in +/// the init AST -- notably `delay_time` in `delay1`/`delay3`, which is +/// multiplied against the value-branch result -- are *coefficients*, not +/// value-equivalents, and their units legitimately differ. Grabbing every +/// identifier (as an earlier version of this code did via `identifier_set` +/// and then `collect_idents`) conflates these roles. +/// +/// We walk the AST looking for any `If(App(IsModuleInput(_)), t, f)` subtree +/// and, on the first match, record *only the bare `Var` identifiers* that +/// appear directly as `t` or `f`. An identifier embedded in arithmetic -- +/// for example `trend`'s then-branch `input / (1 + delay_time * +/// initial_value)` -- is playing a coefficient or rate role, not a +/// value-equivalence role, and must NOT be collapsed into the equivalence +/// group. If the matched branches are both non-bare (or no `isModuleInput` +/// subtree exists), we return an empty set and the pairwise-compatibility +/// check in `check_model_units` skips this stock entirely. That is the +/// correct conservative behaviour: without a structural value-swap marker +/// we have no basis for unit equivalence. +fn init_value_equivalence_group( + ast: &crate::ast::Ast, +) -> HashSet> { + use crate::ast::{Ast, Expr2}; + use crate::builtins::BuiltinFn; + + /// If `expr` is a bare `Var`, insert its identifier into `out`. A bare + /// reference directly under the if-then-else is the stdlib's signal + /// that a module-input slot is interchangeable with its sibling + /// branch's bare reference; anything wrapped in arithmetic (or a + /// builtin call, subscript, nested conditional, etc.) means the + /// identifier is playing a different role and should be left out of + /// the equivalence group. + fn try_insert_bare_var(expr: &Expr2, out: &mut HashSet>) { + if let Expr2::Var(id, _, _) = expr { + out.insert(id.clone()); + } + } + + /// Walk the AST looking for an `If(App(IsModuleInput(_)), t, f, ..)` + /// subtree; on the first match record the bare-Var idents (if any) + /// from both branches and return. We match only the first such + /// subtree -- stdlib modules use this pattern at most once per init + /// equation, and a second isModuleInput inside a branch would + /// indicate a different constraint we do not want to collapse. + fn find_value_branches(expr: &Expr2, out: &mut HashSet>) -> bool { + match expr { + Expr2::If(cond, t, f, _, _) => { + if let Expr2::App(BuiltinFn::IsModuleInput(_, _), _, _) = cond.as_ref() { + try_insert_bare_var(t, out); + try_insert_bare_var(f, out); + return true; + } + find_value_branches(cond, out) + || find_value_branches(t, out) + || find_value_branches(f, out) + } + Expr2::Op2(_, l, r, _, _) => find_value_branches(l, out) || find_value_branches(r, out), + Expr2::Op1(_, e, _, _) => find_value_branches(e, out), + Expr2::App(builtin, _, _) => { + use crate::builtins::{BuiltinContents, walk_builtin_expr}; + let mut found = false; + walk_builtin_expr(builtin, |c| { + if let BuiltinContents::Expr(inner) | BuiltinContents::LookupTable(inner) = c + && !found + { + found = find_value_branches(inner, &mut *out); + } + }); + found + } + Expr2::Subscript(_, args, _, _) => { + for arg in args { + if let crate::ast::IndexExpr2::Expr(e) = arg + && find_value_branches(e, out) + { + return true; + } + } + false + } + Expr2::Const(_, _, _) | Expr2::Var(_, _, _) => false, + } + } + + let mut out = HashSet::new(); + match ast { + Ast::Scalar(expr) => { + find_value_branches(expr, &mut out); + } + Ast::ApplyToAll(_, expr) => { + find_value_branches(expr, &mut out); + } + Ast::Arrayed(_, elements, default_expr, _) => { + for expr in elements.values() { + find_value_branches(expr, &mut out); + } + if let Some(default_expr) = default_expr { + find_value_branches(default_expr, &mut out); + } + } + } + out +} + +/// Per-model tracked function that performs unit inference and checking, +/// accumulating unit warnings/errors through the salsa accumulator. +/// +/// Builds temporary ModelStage0/ModelStage1 representations from the +/// salsa-cached parsed variables, then runs the same unit inference and +/// checking pipeline as the old `run_default_model_checks` callback. +/// Unit mismatches are accumulated as DiagnosticSeverity::Warning to +/// match the old-path behavior where unit issues don't block simulation. +/// +/// Stdlib (implicit) models are skipped because they are generic +/// templates that only make sense when instantiated with specific inputs. +#[salsa::tracked] +pub fn check_model_units(db: &dyn Db, model: SourceModel, project: SourceProject) { + use crate::common::{ErrorCode, ErrorKind}; + use crate::dimensions::DimensionsContext; + use crate::model::{ModelStage0, ModelStage1, ScopeStage0, VariableStage0}; + + // Skip stdlib models -- they are generic and unit checking doesn't + // apply until instantiated with concrete inputs. Stdlib model names + // start with the "stdlib\u{205A}" prefix (two-dot punctuation separator). + if model.name(db).starts_with("stdlib\u{205A}") { + return; + } + + // Skip macro-marked models for the same reason: a macro is a generic + // template whose formal parameters are unitless, so unit-checking its body + // in isolation only produces spurious errors (e.g. C-LEARN's + // `ramp_from_to`/`sshape`). Macro correctness is validated at each + // instantiation through the cross-module unit constraints in `units_infer`. + if model.macro_spec(db).is_some() { + return; + } + + let model_name = model.name(db).clone(); + let units_ctx = project_units_context(db, project); + let dm_dims = project_datamodel_dims(db, project); + let dim_context = DimensionsContext::from(dm_dims.as_slice()); + + // Helper: build a ModelStage0 from a SourceModel's parsed variables. + let build_model_s0 = |src_model: &SourceModel, is_stdlib: bool| -> ModelStage0 { + let src_vars = src_model.variables(db); + let module_ctx = model_module_ident_context(db, *src_model, project, vec![]); + let mut var_list: Vec = Vec::new(); + let mut implicit_dm: Vec = Vec::new(); + for (_name, svar) in src_vars.iter() { + let parsed = parse_source_variable_with_module_context(db, *svar, project, module_ctx); + var_list.push(parsed.variable.clone()); + implicit_dm.extend(parsed.implicit_vars.iter().cloned()); + } + // Parse implicit vars (SMOOTH/DELAY expansion). + let mut dummy: Vec = Vec::new(); + var_list.extend(implicit_dm.into_iter().map(|dm_var| { + crate::variable::parse_var(dm_dims, &dm_var, &mut dummy, units_ctx, |mi| { + Ok(Some(mi.clone())) + }) + })); + let variables: HashMap, VariableStage0> = var_list + .into_iter() + .map(|v| (Ident::new(v.ident()), v)) + .collect(); + ModelStage0 { + ident: Ident::new(src_model.name(db)), + display_name: src_model.name(db).clone(), + variables, + errors: None, + implicit: is_stdlib, + } + }; + + // Build ModelStage0 for all project models so that cross-module unit + // inference constraints (module inputs/outputs) can resolve submodel + // variable types. Stdlib models are included in the map because user + // models may reference them as modules. + let project_models = project.models(db); + let mut all_s0: Vec = Vec::new(); + for (name, src_model) in project_models.iter() { + let is_stdlib = name.starts_with("stdlib\u{205A}"); + all_s0.push(build_model_s0(src_model, is_stdlib)); + } + + let models_s0: HashMap, &ModelStage0> = + all_s0.iter().map(|m| (m.ident.clone(), m)).collect(); + + // Lower all ModelStage0 -> ModelStage1. + let all_s1: Vec = all_s0 + .iter() + .map(|ms0| { + let scope = ScopeStage0 { + models: &models_s0, + dimensions: &dim_context, + model_name: ms0.ident.as_str(), + }; + ModelStage1::new(&scope, ms0) + }) + .collect(); + + let models_s1: HashMap, &ModelStage1> = + all_s1.iter().map(|m| (m.name.clone(), m)).collect(); + + // Find the target model in the lowered map. + let target_ident = Ident::::new(&model_name); + let target_model = match models_s1.get(&target_ident) { + Some(m) => *m, + None => return, + }; + + // Check whether the model declares units on any variable. If not, + // skip surfacing inference errors (the model wasn't designed with + // dimensional analysis in mind). + let has_declared_units = target_model + .variables + .values() + .any(|var| var.units().is_some()); + + // Run unit inference. + let inferred_units = crate::units_infer::infer(&models_s1, units_ctx, target_model) + .unwrap_or_else(|err| { + if has_declared_units + && let crate::common::UnitError::InferenceError { code, .. } = &err + && *code == ErrorCode::UnitMismatch + { + CompilationDiagnostic(Diagnostic { + model: model_name.clone(), + variable: None, + error: DiagnosticError::Model(crate::common::Error { + kind: ErrorKind::Model, + code: *code, + details: Some(format!("{}", err)), + }), + severity: DiagnosticSeverity::Warning, + }) + .accumulate(db); + } + Default::default() + }); + + // Check stdlib module argument unit compatibility. + // + // The unit inference handles cross-module constraints recursively, but + // implicit module variables (from SMOOTH/DELAY expansion) may not be + // fully processed by the inference when the sub-model's internal + // constraints aren't yet resolved. We do an explicit check here: for + // each implicit Module variable in the target model, verify that + // arguments bound to the same internal variable have compatible units. + // + // For stdlib modules like SMTH1, the first argument (input) and third + // argument (initial_value) must have the same units because they both + // feed into the stock's init equation. We check this by looking up + // each argument's units (declared or inferred) and comparing. + if has_declared_units { + for (var_ident, var) in target_model.variables.iter() { + if let crate::variable::Variable::Module { + model_name: sub_model_name, + inputs, + .. + } = var + { + // Only check stdlib modules where we know the constraint structure + if !sub_model_name.as_str().starts_with("stdlib\u{205A}") { + continue; + } + let submodel = match models_s1.get(sub_model_name) { + Some(m) => m, + None => continue, + }; + // Find groups of inputs that must have compatible units. + // + // In smth1/smth3 the stock's init equation is + // `if isModuleInput(initial_value) then initial_value else input`, + // constraining `input` and `initial_value` (and nothing else) to + // share units. In delay1/delay3 the same conditional is + // multiplied by `delay_time`, which is a coefficient whose units + // are independent. We specifically extract the identifiers that + // sit in the value branches of the `if isModuleInput(...)` test + // (see `init_value_equivalence_group`); the simple textual + // `identifier_set` would also return `delay_time`, which is what + // produced the spurious delay3 mismatches in World3. + let stock_init_deps: Vec>> = submodel + .variables + .values() + .filter_map(|sv| { + if matches!(sv, crate::variable::Variable::Stock { .. }) { + sv.ast().map(init_value_equivalence_group) + } else { + None + } + }) + .collect(); + + for init_dep_set in &stock_init_deps { + // Collect (src_units, input) pairs for inputs that bind to + // variables in this stock's init dep set. + let mut group_units: Vec<(Ident, &crate::datamodel::UnitMap)> = + Vec::new(); + for input in inputs { + if !init_dep_set.contains(&input.dst) { + continue; + } + let src_units = target_model + .variables + .get(&input.src) + .and_then(|v| v.units()) + .or_else(|| inferred_units.get(&input.src)); + if let Some(units) = src_units { + group_units.push((input.src.clone(), units)); + } + } + // Check pairwise compatibility + if group_units.len() >= 2 { + let (first_src, first_units) = &group_units[0]; + for (other_src, other_units) in &group_units[1..] { + if first_units != other_units { + CompilationDiagnostic(Diagnostic { + model: model_name.clone(), + variable: Some(var_ident.to_string()), + error: DiagnosticError::Unit( + crate::common::UnitError::ConsistencyError( + ErrorCode::UnitMismatch, + crate::builtins::Loc::default(), + Some(format!( + "module '{}': argument '{}' has units '{}' \ + but argument '{}' has units '{}' \ + (both feed the same internal variable)", + var_ident, + first_src, + first_units, + other_src, + other_units, + )), + ), + ), + severity: DiagnosticSeverity::Warning, + }) + .accumulate(db); + } + } + } + } + } + } + } + + // Run unit checking. + match crate::units_check::check(units_ctx, &inferred_units, target_model) { + Ok(Ok(())) => {} + Ok(Err(errors)) => { + for (ident, err) in errors.into_iter() { + CompilationDiagnostic(Diagnostic { + model: model_name.clone(), + variable: Some(ident.to_string()), + error: DiagnosticError::Unit(err), + severity: DiagnosticSeverity::Warning, + }) + .accumulate(db); + } + } + Err(err) => { + CompilationDiagnostic(Diagnostic { + model: model_name.clone(), + variable: None, + error: DiagnosticError::Model(crate::common::Error { + kind: ErrorKind::Model, + code: ErrorCode::Generic, + details: Some(format!("unit checking failed: {}", err)), + }), + severity: DiagnosticSeverity::Warning, + }) + .accumulate(db); + } + } +} diff --git a/src/simlin-engine/src/lib.rs b/src/simlin-engine/src/lib.rs index 54488e235..2d08e1187 100644 --- a/src/simlin-engine/src/lib.rs +++ b/src/simlin-engine/src/lib.rs @@ -49,6 +49,7 @@ mod db_macro_registry; // (`scripts/lint-project.sh` rule 2); `db.rs` reaches it via // `crate::db_dep_graph::...`, mirroring `db_ltm_ir` / `db_macro_registry`. mod db_dep_graph; +mod db_units; mod db_var_fragment; pub mod diagram; mod dimensions; diff --git a/src/simlin-engine/src/macro_expansion_tests.rs b/src/simlin-engine/src/macro_expansion_tests.rs index 9f24abf20..0adea4aac 100644 --- a/src/simlin-engine/src/macro_expansion_tests.rs +++ b/src/simlin-engine/src/macro_expansion_tests.rs @@ -1260,3 +1260,40 @@ sibling= ); } } + +// ── unit checking: macro-marked models are templates, like stdlib ────────── + +/// F4: a macro-marked model is a generic template -- its formal parameters are +/// unitless, so unit checking it in isolation produces only spurious errors. +/// Like a stdlib model, it must be SKIPPED by unit checking. Even a blatant +/// dimensional inconsistency in the macro body (here `meters + seconds`) must +/// not surface a diagnostic attributed to the macro model. (Macro correctness +/// is validated at instantiation via cross-module unit constraints; this is +/// the source of C-LEARN's spurious `ramp_from_to`/`sshape` unit warnings.) +#[test] +fn macro_body_units_are_not_checked() { + let source = mdl(r#":MACRO: BADUNITS(a, b) +BADUNITS = lhs + rhs + ~ widgets + ~ | +lhs = a + ~ meters + ~ | +rhs = b + ~ seconds + ~ | +:END OF MACRO: +y = + BADUNITS(3, 4) + ~ widgets + ~ | +"#); + + let diags = diagnostics_for(&source); + let macro_diags: Vec<_> = diags.iter().filter(|d| d.model == "badunits").collect(); + assert!( + macro_diags.is_empty(), + "unit checking must skip macro-marked models; diagnostics attributed \ + to the `badunits` macro model:\n{macro_diags:#?}", + ); +} diff --git a/src/simlin-engine/src/test_common.rs b/src/simlin-engine/src/test_common.rs index 4e12176d7..ddb7fd693 100644 --- a/src/simlin-engine/src/test_common.rs +++ b/src/simlin-engine/src/test_common.rs @@ -116,6 +116,18 @@ impl TestProject { self } + /// Add a unit definition with alias names (a Vensim `22:` equivalence + /// group). The first name is the primary; the rest are aliases. + pub fn unit_with_aliases(mut self, name: &str, aliases: &[&str]) -> Self { + self.units.push(datamodel::Unit { + name: name.to_string(), + equation: None, + disabled: false, + aliases: aliases.iter().map(|s| s.to_string()).collect(), + }); + self + } + /// Add an indexed dimension (e.g., for numeric indices) pub fn indexed_dimension(mut self, name: &str, size: u32) -> Self { self.dimensions diff --git a/src/simlin-engine/src/units.rs b/src/simlin-engine/src/units.rs index 13f376954..f458b458b 100644 --- a/src/simlin-engine/src/units.rs +++ b/src/simlin-engine/src/units.rs @@ -170,11 +170,12 @@ impl Context { // A primary name that is already an alias of *another* unit is a // genuine conflict; a primary name that's already itself a prime // unit is a benign re-declaration as long as the inferred unit - // map matches. - if let Some(target) = aliases.get(&unit_name) { - if target != &unit_name { - unit_errors.push(dup_err(&unit_name)); - } + // map matches. A name that appears among its OWN aliases (a + // self-alias -- e.g. `22:Yr,...,yr,...` where both `Yr` and `yr` + // canonicalize to `yr`) is also benign: we must still register the + // prime unit so that the name and its aliases resolve via lookup. + if matches!(aliases.get(&unit_name), Some(target) if target != &unit_name) { + unit_errors.push(dup_err(&unit_name)); } else { let new_map: UnitMap = [(unit_name.clone(), 1)].iter().cloned().collect(); match parsed_units.entry(unit_name.clone()) { @@ -241,10 +242,10 @@ impl Context { None => [(unit_name.clone(), 1)].iter().cloned().collect(), }; - if let Some(target) = ctx.aliases.get(&unit_name) { - if target != &unit_name { - unit_errors.push(dup_err(&unit_name)); - } + // As in step 1: only an alias of *another* unit is a conflict; a + // self-alias is benign and the prime unit must still be registered. + if matches!(ctx.aliases.get(&unit_name), Some(target) if target != &unit_name) { + unit_errors.push(dup_err(&unit_name)); } else { match ctx.units.entry(unit_name.clone()) { Entry::Vacant(e) => { @@ -444,6 +445,47 @@ pub fn parse_units( } } +#[test] +fn self_aliased_prime_unit_is_registered() { + // A Vensim `22:` equivalence group can list the canonical form of its own + // primary name among its aliases. C-LEARN declares + // `22:Yr,year,years,yr,Year,Years`: the primary `Yr` canonicalizes to `yr`, + // and `yr` ALSO appears in the alias list, so `yr` becomes an alias of + // itself. The prime unit must still be registered so that the primary + // name and every alias resolve to the same UnitMap. Before the fix the + // self-alias caused `Context::new` to skip registering the prime unit, so + // `lookup` returned None and callers fell back to whatever (un-normalized) + // name they queried with -- the source of the C-LEARN `year` vs `yr` + // mismatch flood. + let ctx = Context::new( + &[Unit { + name: "Yr".to_owned(), + equation: None, + disabled: false, + aliases: vec![ + "year".to_owned(), + "years".to_owned(), + "yr".to_owned(), + "Year".to_owned(), + "Years".to_owned(), + ], + }], + &Default::default(), + ) + .expect("a self-aliased unit group must not produce a DuplicateUnit error"); + + let yr = ctx.lookup("yr"); + let year = ctx.lookup("year"); + assert!( + yr.is_some(), + "lookup(\"yr\") must resolve the registered prime unit, got None" + ); + assert_eq!( + yr, year, + "\"yr\" and \"year\" must resolve to the same UnitMap" + ); +} + #[test] fn test_pretty_print_unit() { let context = Context::new( diff --git a/src/simlin-engine/src/units_check.rs b/src/simlin-engine/src/units_check.rs index 4a81e164a..dfc91d568 100644 --- a/src/simlin-engine/src/units_check.rs +++ b/src/simlin-engine/src/units_check.rs @@ -502,111 +502,50 @@ pub fn check( check_flows(inflows); check_flows(outflows); } + // Compare a sub-expression's computed units against the variable's + // declared (`expected`) units. Returns a mismatch error, or None + // when the units match, the expression is constant, or a + // dependency's units are unknown. Unknown units (DoesNotExist) are + // NOT a dimensional inconsistency -- they arise from module outputs + // or synthesized helpers that inference left unresolved -- so we + // skip the check, exactly as the arrayed-element consistency check + // above does (and as Vensim does: it warns that the dependency + // lacks units rather than erroring on every reader). + let check_against_expected = |expr: &Expr2| -> Option { + match units.check(expr) { + Ok(Units::Explicit(actual)) if actual != *expected => { + let loc = expr.get_loc(); + Some(ConsistencyError( + ErrorCode::UnitMismatch, + Loc::new(loc.start.into(), loc.end.into()), + Some(format!( + "computed units '{actual}' don't match specified units" + )), + )) + } + Ok(_) => None, + Err(ConsistencyError(ErrorCode::DoesNotExist, _, _)) => None, + Err(err) => Some(err), + } + }; + if let Some(ast) = var.ast() { match ast { - Ast::Scalar(expr) => match units.check(expr) { - Ok(Units::Explicit(actual)) => { - if actual != *expected { - let details = format!( - "computed units '{}' don't match specified units", - &actual, - ); - let loc = expr.get_loc(); - errors.push(( - ident.clone(), - ConsistencyError( - ErrorCode::UnitMismatch, - Loc::new(loc.start.into(), loc.end.into()), - Some(details), - ), - )) - } - } - Ok(Units::Constant) => { - // definitionally we're fine - } - Err(err) => { + Ast::Scalar(expr) | Ast::ApplyToAll(_, expr) => { + if let Some(err) = check_against_expected(expr) { errors.push((ident.clone(), err)); } - }, - Ast::ApplyToAll(_, expr) => match units.check(expr) { - Ok(Units::Explicit(actual)) => { - if actual != *expected { - let details = format!( - "computed units '{}' don't match specified units", - &actual, - ); - let loc = expr.get_loc(); - errors.push(( - ident.clone(), - ConsistencyError( - ErrorCode::UnitMismatch, - Loc::new(loc.start.into(), loc.end.into()), - Some(details), - ), - )) - } - } - Ok(Units::Constant) => { - // definitionally we're fine - } - Err(err) => { - errors.push((ident.clone(), err)); - } - }, + } Ast::Arrayed(_, asts, default_expr, _) => { - // Check each element expression in the arrayed variable for (_element, expr) in asts.iter() { - match units.check(expr) { - Ok(Units::Explicit(actual)) => { - if actual != *expected { - let details = format!( - "computed units '{}' don't match specified units", - &actual, - ); - let loc = expr.get_loc(); - errors.push(( - ident.clone(), - ConsistencyError( - ErrorCode::UnitMismatch, - Loc::new(loc.start.into(), loc.end.into()), - Some(details), - ), - )) - } - } - Ok(Units::Constant) => { - // definitionally we're fine - } - Err(err) => { - errors.push((ident.clone(), err)); - } + if let Some(err) = check_against_expected(expr) { + errors.push((ident.clone(), err)); } } - if let Some(default_expr) = default_expr { - match units.check(default_expr) { - Ok(Units::Explicit(actual)) => { - if actual != *expected { - let details = format!( - "computed units '{}' don't match specified units", - &actual, - ); - let loc = default_expr.get_loc(); - errors.push(( - ident.clone(), - ConsistencyError( - ErrorCode::UnitMismatch, - Loc::new(loc.start.into(), loc.end.into()), - Some(details), - ), - )) - } - } - Ok(Units::Constant) => {} - Err(err) => { - errors.push((ident.clone(), err)); - } - } + if let Some(default_expr) = default_expr + && let Some(err) = check_against_expected(default_expr) + { + errors.push((ident.clone(), err)); } } } diff --git a/src/simlin-engine/src/units_infer.rs b/src/simlin-engine/src/units_infer.rs index 9e6968261..4a21b69cc 100644 --- a/src/simlin-engine/src/units_infer.rs +++ b/src/simlin-engine/src/units_infer.rs @@ -625,8 +625,19 @@ impl UnitInferer<'_> { prefix: &str, constraints: &mut Vec, ) { - let time_units = + let time_units_name = canonicalize(self.ctx.sim_specs.time_units.as_deref().unwrap_or("time")).into_owned(); + // Resolve the time unit through the units Context's alias map so that + // inference uses the same canonical time unit as `units_check::check`. + // Without this, a model that declares some units with an aliased time + // name (e.g. `yr`) while `time_units` names the primary (`year`) + // produces a spurious `year` vs `yr` mismatch on every stock/flow + // constraint. + let time_units: UnitMap = self + .ctx + .lookup(&time_units_name) + .cloned() + .unwrap_or_else(|| [(time_units_name.clone(), 1)].iter().cloned().collect()); for (id, var) in model.variables.iter() { let current_var = format!("{prefix}{id}"); @@ -640,13 +651,15 @@ impl UnitInferer<'_> { { let stock_ident = ident; let stock_var = format!("{prefix}{stock_ident}"); - let expected = [ - (format!("@{prefix}{stock_ident}"), 1), - (time_units.clone(), -1), - ] - .iter() - .cloned() - .collect::(); + // expected = @stock / time_units (the units a flow must carry). + let expected = combine( + UnitOp::Div, + [(format!("@{prefix}{stock_ident}"), 1)] + .iter() + .cloned() + .collect::(), + time_units.clone(), + ); let mut check_flows = |flows: &Vec>| { for flow_ident in flows.iter() { let flow_var = format!("{prefix}{flow_ident}"); @@ -1136,8 +1149,15 @@ pub(crate) fn infer( units_ctx: &Context, model: &ModelStage1, ) -> UnitResult, UnitMap>> { - let time_units = + let time_units_name = canonicalize(units_ctx.sim_specs.time_units.as_deref().unwrap_or("time")).into_owned(); + // Resolve through the alias map so the synthetic `time` variable's units + // match what `units_check::check` uses (see the same resolution in + // `gen_all_constraints`). + let time_units: UnitMap = units_ctx + .lookup(&time_units_name) + .cloned() + .unwrap_or_else(|| [(time_units_name.clone(), 1)].iter().cloned().collect()); let units = UnitInferer { ctx: units_ctx, @@ -1147,7 +1167,7 @@ pub(crate) fn infer( ast: None, init_ast: None, eqn: None, - units: Some([(time_units, 1)].iter().cloned().collect()), + units: Some(time_units), tables: vec![], non_negative: false, is_flow: false, diff --git a/src/simlin-engine/tests/clearn_unit_errors.rs b/src/simlin-engine/tests/clearn_unit_errors.rs new file mode 100644 index 000000000..669691c1a --- /dev/null +++ b/src/simlin-engine/tests/clearn_unit_errors.rs @@ -0,0 +1,273 @@ +// Copyright 2026 The Simlin Authors. All rights reserved. +// Use of this source code is governed by the Apache License, +// Version 2.0, that can be found in the LICENSE file. + +//! Exploratory / diagnostic harness for the C-LEARN unit-error flood. +//! +//! Loads the C-LEARN v77 Vensim model, collects every accumulated +//! diagnostic, and buckets the unit-related ones by a normalized message +//! "template" so we can see WHAT kinds of unit errors Simlin emits and how +//! many of each. Run with: +//! +//! cargo test -p simlin-engine --test clearn_unit_errors -- --ignored --nocapture + +use std::collections::BTreeMap; + +use simlin_engine::common::{ErrorCode, UnitError}; +use simlin_engine::db::{ + Diagnostic, DiagnosticError, SimlinDb, collect_all_diagnostics, sync_from_datamodel_incremental, +}; +use simlin_engine::open_vensim; + +fn load_clearn() -> simlin_engine::datamodel::Project { + let mdl_content = + std::fs::read_to_string("../../test/xmutil_test_models/C-LEARN v77 for Vensim.mdl") + .expect("failed to read C-LEARN mdl -- run from the simlin-engine crate dir"); + open_vensim(&mdl_content).expect("open_vensim should parse C-LEARN without I/O errors") +} + +fn diag_details(d: &Diagnostic) -> String { + match &d.error { + DiagnosticError::Unit(UnitError::ConsistencyError(_, _, Some(s))) => s.clone(), + DiagnosticError::Unit(UnitError::InferenceError { + details: Some(s), .. + }) => s.clone(), + DiagnosticError::Unit(UnitError::DefinitionError(_, Some(s))) => s.clone(), + DiagnosticError::Unit(other) => format!("{:?}", other), + DiagnosticError::Model(e) => e.details.as_deref().unwrap_or("").to_string(), + _ => String::new(), + } +} + +/// Is this diagnostic unit-related (either a `Unit` variant or a model-level +/// `UnitMismatch`)? +fn is_unit_diag(d: &Diagnostic) -> bool { + matches!(&d.error, DiagnosticError::Unit(_)) + || matches!( + &d.error, + DiagnosticError::Model(e) if e.code == ErrorCode::UnitMismatch + ) +} + +/// Classify the diagnostic's error variant into a short tag. +fn variant_tag(d: &Diagnostic) -> &'static str { + match &d.error { + DiagnosticError::Unit(UnitError::ConsistencyError(..)) => "Unit::Consistency", + DiagnosticError::Unit(UnitError::InferenceError { .. }) => "Unit::Inference", + DiagnosticError::Unit(UnitError::DefinitionError(..)) => "Unit::Definition", + DiagnosticError::Model(_) => "Model", + DiagnosticError::Equation(_) => "Equation", + DiagnosticError::Assembly(_) => "Assembly", + } +} + +/// Collapse the per-variable noise in a message so we can group by template: +/// blank out single-quoted spans and replace digit runs with `#`. +/// +/// A `'` only opens a quoted span when it is NOT a contraction apostrophe -- +/// i.e. when the previous emitted character is not an ASCII letter/digit. +/// Otherwise `can't`/`don't` would flip quote parity and leak the per-variable +/// dependency name into the template, splitting one bucket into hundreds. +fn normalize_template(msg: &str) -> String { + let mut out = String::with_capacity(msg.len()); + let mut in_quote = false; + let mut prev_alnum = false; + let mut prev_digit = false; + for ch in msg.chars() { + if ch == '\'' { + if in_quote { + in_quote = false; + } else if prev_alnum { + // Contraction apostrophe -- emit literally. + out.push('\''); + } else { + out.push_str("'_'"); + in_quote = true; + } + prev_alnum = false; + prev_digit = false; + continue; + } + if in_quote { + continue; + } + if ch.is_ascii_digit() { + if !prev_digit { + out.push('#'); + } + prev_digit = true; + prev_alnum = true; + continue; + } + prev_digit = false; + prev_alnum = ch.is_ascii_alphanumeric(); + out.push(ch); + } + out +} + +#[test] +#[ignore = "loads the 1.4MB C-LEARN model; run explicitly with --ignored --nocapture"] +fn dump_clearn_unit_diagnostics() { + let project = load_clearn(); + let mut db = SimlinDb::default(); + let sync = sync_from_datamodel_incremental(&mut db, &project, None); + let sync_result = sync.to_sync_result(); + let diagnostics = collect_all_diagnostics(&db, &sync_result); + + let unit_diags: Vec<&Diagnostic> = diagnostics.iter().filter(|d| is_unit_diag(d)).collect(); + + println!("\n=== C-LEARN diagnostics summary ==="); + println!("total diagnostics: {}", diagnostics.len()); + println!("unit-related diagnostics: {}", unit_diags.len()); + + // Non-unit diagnostics (parse/equation/assembly) -- should be few. + let non_unit: Vec<&Diagnostic> = diagnostics.iter().filter(|d| !is_unit_diag(d)).collect(); + println!("non-unit diagnostics: {}", non_unit.len()); + + // Bucket unit diagnostics by (variant, normalized template). + let mut buckets: BTreeMap<(String, String), (usize, String, Vec)> = BTreeMap::new(); + for d in &unit_diags { + let tag = variant_tag(d).to_string(); + let details = diag_details(d); + let template = normalize_template(&details); + let entry = buckets + .entry((tag, template)) + .or_insert_with(|| (0, details.clone(), Vec::new())); + entry.0 += 1; + if entry.2.len() < 5 { + let var = d.variable.clone().unwrap_or_else(|| "".to_string()); + entry.2.push(format!("{}::{}", d.model, var)); + } + } + + println!("\n=== unit-diagnostic buckets (by message template) ==="); + let mut sorted: Vec<_> = buckets.into_iter().collect(); + sorted.sort_by_key(|(_, (count, _, _))| std::cmp::Reverse(*count)); + for ((tag, _template), (count, example, vars)) in &sorted { + println!("\n[{count:>4}] {tag}"); + println!(" e.g.: {example}"); + println!(" vars: {}", vars.join(", ")); + } + + // Show a sample of the non-unit diagnostics too. + if !non_unit.is_empty() { + println!("\n=== non-unit diagnostics (first 20) ==="); + for d in non_unit.iter().take(20) { + println!(" {}::{:?} {:?}", d.model, d.variable, variant_tag(d)); + } + } + + // Per-model breakdown of the dominant "can't find or no units for + // dependency" category. + let mut per_model: BTreeMap = BTreeMap::new(); + for d in &unit_diags { + if diag_details(d).contains("can't find or no units for dependency") { + *per_model.entry(d.model.clone()).or_default() += 1; + } + } + println!("\n=== 'can't find units for dependency' by model ==="); + let mut pm: Vec<_> = per_model.into_iter().collect(); + pm.sort_by_key(|(_, count)| std::cmp::Reverse(*count)); + for (model, count) in &pm { + println!(" {count:>4} {model}"); + } + + // Census of macro-marked models in the datamodel. + println!("\n=== macro-marked models in datamodel ==="); + for model in &project.models { + if model.macro_spec.is_some() { + println!(" macro: {} ({} vars)", model.name, model.variables.len()); + } + } +} + +/// Regression guard: the C-LEARN unit-error flood (481 spurious diagnostics) +/// must stay cleared. Asserts the invariants established by the four fixes: +/// +/// F1/F2 (yr/year alias): no diagnostic confuses `yr` with `year`. +/// F3 (unknown deps): no "can't find or no units for dependency" errors. +/// F4 (macro templates): no diagnostic is attributed to a macro model. +/// +/// A small documented residual (~14) remains -- genuine-looking dimensional +/// subtleties Vensim tolerates (permafrost-methane `degreesc`, `ph`, an +/// IF-branch unit difference) plus one umbrella inference warning -- so we +/// also assert a coarse total bound to catch gross regressions without pinning +/// the exact residual. +#[test] +#[ignore = "loads the 1.4MB C-LEARN model; run explicitly with --ignored"] +fn clearn_unit_error_flood_is_cleared() { + let project = load_clearn(); + let macro_models: Vec = project + .models + .iter() + .filter(|m| m.macro_spec.is_some()) + .map(|m| m.name.clone()) + .collect(); + + let mut db = SimlinDb::default(); + let sync = sync_from_datamodel_incremental(&mut db, &project, None); + let diagnostics = collect_all_diagnostics(&db, &sync.to_sync_result()); + let unit_diags: Vec<&Diagnostic> = diagnostics.iter().filter(|d| is_unit_diag(d)).collect(); + + let render = |ds: &[&Diagnostic]| { + ds.iter() + .map(|d| format!(" {}.{:?}: {}", d.model, d.variable, diag_details(d))) + .collect::>() + .join("\n") + }; + + // F3: unknown dependency units are tolerated, never a hard error. + let cant_find: Vec<&Diagnostic> = unit_diags + .iter() + .copied() + .filter(|d| diag_details(d).contains("can't find or no units for dependency")) + .collect(); + assert!( + cant_find.is_empty(), + "F3 regression: {} 'can't find units for dependency' error(s):\n{}", + cant_find.len(), + render(&cant_find) + ); + + // F4: macro-marked models (templates) are never unit-checked. + let macro_attributed: Vec<&Diagnostic> = unit_diags + .iter() + .copied() + .filter(|d| macro_models.iter().any(|m| m == &d.model)) + .collect(); + assert!( + macro_attributed.is_empty(), + "F4 regression: {} diagnostic(s) attributed to macro models {:?}:\n{}", + macro_attributed.len(), + macro_models, + render(¯o_attributed) + ); + + // F1/F2: `yr` and `year` resolve to the same canonical unit, so no + // diagnostic mentions both as distinct units. ("year" does not contain the + // substring "yr", so this only fires when both tokens genuinely appear.) + let yr_year: Vec<&Diagnostic> = unit_diags + .iter() + .copied() + .filter(|d| { + let s = diag_details(d); + s.contains("yr") && s.contains("year") + }) + .collect(); + assert!( + yr_year.is_empty(), + "F1/F2 regression: {} diagnostic(s) confusing yr/year:\n{}", + yr_year.len(), + render(&yr_year) + ); + + // Coarse guard against gross regressions (was 481; documented residual ~14). + assert!( + unit_diags.len() <= 20, + "unit-diagnostic count regressed to {} (expected the documented \ + residual of ~14); all unit diagnostics:\n{}", + unit_diags.len(), + render(&unit_diags) + ); +} diff --git a/src/simlin-engine/tests/unit_alias_module_inference.rs b/src/simlin-engine/tests/unit_alias_module_inference.rs new file mode 100644 index 000000000..bd85fb9e6 --- /dev/null +++ b/src/simlin-engine/tests/unit_alias_module_inference.rs @@ -0,0 +1,166 @@ +// Copyright 2026 The Simlin Authors. All rights reserved. +// Use of this source code is governed by the Apache License, +// Version 2.0, that can be found in the LICENSE file. + +//! Minimal reproducers for the C-LEARN unit-error flood, isolated from the +//! 1.4MB model. Each test targets one root cause: +//! +//! F2: unit *inference* builds the Time variable and the stock/flow +//! constraint from the raw `sim_specs.time_units` string instead of +//! resolving it through the units `Context`'s alias map, so a model that +//! declares some units with an aliased time name (`yr`) and uses the +//! primary (`year`) elsewhere produces a spurious `year` vs `yr` +//! inference mismatch even after the `Context` self-alias fix. +//! +//! F3: unit *checking* must treat a dependency whose units are unknown (a +//! module output / synthesized helper that inference did not resolve) as +//! unconstrained -- skipping the consistency check -- rather than +//! emitting a hard "can't find or no units for dependency" error. The +//! array-element path already does this (units_check.rs); the scalar / +//! binary path must too. +//! +//! F4: unit checking must skip macro-marked models (generic templates whose +//! formal parameters are unitless), exactly as it already skips stdlib +//! models. + +use simlin_engine::common::UnitError; +use simlin_engine::db::{ + Diagnostic, DiagnosticError, SimlinDb, collect_all_diagnostics, sync_from_datamodel_incremental, +}; +use simlin_engine::test_common::TestProject; + +fn diag_details(d: &Diagnostic) -> String { + match &d.error { + DiagnosticError::Unit(UnitError::ConsistencyError(_, _, Some(s))) => s.clone(), + DiagnosticError::Unit(UnitError::InferenceError { + details: Some(s), .. + }) => s.clone(), + DiagnosticError::Unit(UnitError::DefinitionError(_, Some(s))) => s.clone(), + DiagnosticError::Unit(other) => format!("{:?}", other), + DiagnosticError::Model(e) => e.details.as_deref().unwrap_or("").to_string(), + _ => String::new(), + } +} + +/// Collect every diagnostic (across all models) for a built datamodel. +fn diagnostics_for(project: &simlin_engine::datamodel::Project) -> Vec { + let mut db = SimlinDb::default(); + let sync = sync_from_datamodel_incremental(&mut db, project, None); + collect_all_diagnostics(&db, &sync.to_sync_result()) +} + +/// F2: a stock/flow pair whose flow units are declared with the *aliased* +/// time name (`yr`) while the model's time units use the *primary* name +/// (`year`). Vensim treats `Yr,year,years,yr,...` as one unit, so this model +/// is dimensionally consistent and must produce no `year`/`yr` mismatch. +/// +/// The `Context` self-alias fix (F1) makes declared-vs-declared comparisons +/// resolve, but unit *inference* still injects the raw time-unit name `year` +/// into the stock/flow constraint, conflicting with the flow's declared `yr`. +#[test] +fn aliased_time_unit_does_not_cause_inference_mismatch() { + // `Yr` is the primary; `yr` and `year` are aliases. `year` is also the + // declared time unit -- exactly C-LEARN's `22:Yr,year,years,yr,Year,Years`. + let project = TestProject::new("aliased_time") + .with_time_units("year") + .unit_with_aliases("Yr", &["year", "years", "yr", "Year", "Years"]) + .unit("widgets", None) + .stock_with_units("store", "0", &["inflow"], &[], Some("widgets")) + .flow_with_units("inflow", "1", Some("widgets/yr")) + .build_datamodel(); + + // The model is fully dimensionally consistent (store: widgets, inflow: + // widgets/yr == widgets/year), so there must be no unit diagnostics at all. + let diagnostics = diagnostics_for(&project); + + assert!( + diagnostics.is_empty(), + "aliased time unit (yr == year) must not produce any unit diagnostic. Got {}:\n{}", + diagnostics.len(), + diagnostics + .iter() + .map(|d| format!(" {}.{:?}: {}", d.model, d.variable, diag_details(d))) + .collect::>() + .join("\n") + ); +} + +/// F3: a variable with declared units that references a dependency whose units +/// are unknown must NOT produce a hard "can't find or no units for dependency" +/// error. Unknown units are not a mismatch -- like Vensim (which only warns +/// that the dependency itself lacks units) and like the existing arrayed +/// element path in `units_check`, the consistency check must be skipped when a +/// dependency's units cannot be determined. This is the mechanism behind the +/// 366 main-model "can't find units" errors in C-LEARN (references to module +/// outputs / synthesized helper auxes whose units inference left unresolved). +/// +/// We force a genuinely-unresolvable dependency with an under-determined +/// product: `result (meters) = driver1 * driver2` constrains only the PRODUCT +/// `driver1 * driver2 == meters`, so inference cannot isolate either factor and +/// leaves both unresolved. (A direct `result = driver` would let inference +/// back-propagate `result`'s declared units onto `driver`.) +#[test] +fn unknown_dependency_units_do_not_hard_error() { + let project = TestProject::new("unknown_dep") + .with_time_units("year") + .unit("meters", None) + // Neither driver has declared units, and the product under-determines + // them, so both are unknown-unit dependencies of `result`. + .aux("driver1", "5", None) + .aux("driver2", "3", None) + .aux_with_units("result", "driver1 * driver2", Some("meters")) + .build_datamodel(); + + let diagnostics = diagnostics_for(&project); + + let cant_find: Vec<_> = diagnostics + .iter() + .filter(|d| diag_details(d).contains("can't find or no units for dependency")) + .collect(); + + assert!( + cant_find.is_empty(), + "a reference to an unknown-units dependency must not hard-error. Got {}:\n{}", + cant_find.len(), + cant_find + .iter() + .map(|d| format!(" {}.{:?}: {}", d.model, d.variable, diag_details(d))) + .collect::>() + .join("\n") + ); +} + +/// Regression guard for F3: tolerating *unknown* dependency units must NOT +/// silence a *genuine* mismatch between two KNOWN units. `result` is declared +/// `meters` but computes `seconds`, so a "computed units don't match" error +/// must still be reported. +#[test] +fn known_unit_mismatch_is_still_caught() { + let project = TestProject::new("known_mismatch") + .with_time_units("year") + .unit("meters", None) + .unit("seconds", None) + .aux_with_units("source", "5", Some("seconds")) + .aux_with_units("result", "source", Some("meters")) + .build_datamodel(); + + let diagnostics = diagnostics_for(&project); + + let mismatch: Vec<_> = diagnostics + .iter() + .filter(|d| { + let details = diag_details(d); + details.contains("computed units 'seconds' don't match specified units") + }) + .collect(); + + assert!( + !mismatch.is_empty(), + "a genuine meters-vs-seconds mismatch must still be caught. All diagnostics:\n{}", + diagnostics + .iter() + .map(|d| format!(" {}.{:?}: {}", d.model, d.variable, diag_details(d))) + .collect::>() + .join("\n") + ); +} diff --git a/src/simlin-engine/tests/wrld3_unit_errors.rs b/src/simlin-engine/tests/wrld3_unit_errors.rs index b8fe6b1fa..00b7fd6fc 100644 --- a/src/simlin-engine/tests/wrld3_unit_errors.rs +++ b/src/simlin-engine/tests/wrld3_unit_errors.rs @@ -7,7 +7,7 @@ //! Bug 1 (delay3 conflation): the stdlib `delay3` / `delay1` stock-init //! equation has the form `(if isModuleInput(initial_value) then initial_value //! else input) * delay_time`. The stdlib-module argument compatibility check -//! in `db::check_model_units` extracts ALL identifiers referenced by the init +//! in `db_units::check_model_units` extracts ALL identifiers referenced by the init //! AST and pairwise-compares their declared units, so `input` (pollution/year) //! and `delay_time` (year) are wrongly flagged as conflicting even though //! `delay_time` participates only as a coefficient. Only the identifiers in