From cecd0608b5e04d79b1c25697097cd3a6de190324 Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Mon, 8 Jun 2026 18:32:50 -0700 Subject: [PATCH 1/3] engine: make 2-D VECTOR SORT ORDER p a hard genuine-Vensim gate The #585 fix made VECTOR SORT ORDER rank per iterated row (innermost dim is the sorted axis, 0-based), so the engine now produces p = [0,1,1,0,0,1] for the 2-D p[DimA,DimB] = VECTOR SORT ORDER(o[DimA,DimB], ASCENDING), matching test/sdeverywhere/models/vector/vector.dat exactly. Real-Vensim multi- dimensional VSO semantics are independently confirmed by C-LEARN (simulates_clearn vs Ref.vdf), so the dormant fixture is no longer ambiguous. Removes p from the simulates_vector_xmile_genuine exclusion list, leaving only y (GitHub #578, a separate ELM MAP compile gap). Closes #576. --- .../tests/integration/simulate.rs | 59 +++++++++---------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/simlin-engine/tests/integration/simulate.rs b/src/simlin-engine/tests/integration/simulate.rs index fbf655882..45d4a5386 100644 --- a/src/simlin-engine/tests/integration/simulate.rs +++ b/src/simlin-engine/tests/integration/simulate.rs @@ -1696,16 +1696,14 @@ corpus_tests! { // source array, out-of-range -> :NA:, no modulo). vector.xmile is // exercised through all three comparison paths by the dedicated // `simulates_vector_xmile_genuine` test below, NOT here: that test keeps - // c/f/g (the ELM MAP base/full-source variables) and every other - // variable as hard genuine-Vensim gates against - // test/sdeverywhere/models/vector/vector.dat, narrowing the comparison - // to exclude only two pre-existing, separately-tracked, out-of-scope - // variables -- `y` (GitHub #578: scalar-source/expression-offset ELM MAP - // does not compile) and `p` (GitHub #576: dormant/unverified 2-D VECTOR - // SORT ORDER fixture data; a different builtin, unchanged here). This - // list runs an unconditional full comparison, which cannot carve out - // those variables; the narrowed gate lives in its own test instead of - // weakening every model's comparison. + // c/f/g (the ELM MAP base/full-source variables), the 2-D VECTOR SORT + // ORDER `p`, and every other variable as hard genuine-Vensim gates + // against test/sdeverywhere/models/vector/vector.dat, narrowing the + // comparison to exclude only one pre-existing, separately-tracked, + // out-of-scope variable -- `y` (GitHub #578: scalar-source/expression- + // offset ELM MAP does not compile). This list runs an unconditional full + // comparison, which cannot carve out that variable; the narrowed gate + // lives in its own test instead of weakening every model's comparison. // "test/sdeverywhere/models/vector/vector.xmile", // -> simulates_vector_xmile_genuine // // --- Permanently excluded (not test models) --- @@ -1736,25 +1734,27 @@ corpus_tests! { /// exercises (1-D VSO `l`/`m`, VECTOR SELECT `q`/`r`/`s`, reducers /// `u`/`v`/`w`, and the rest). /// -/// Exactly two variables are carved out, both pre-existing and -/// separately-tracked gaps unrelated to the ELM MAP base/full-source fix -/// (per the phase file's "prefer full inclusion; narrow only with a tracked -/// issue" guidance). First, `y[DimA] = VECTOR ELM MAP(x[three], (DimA-1))` -/// (GitHub #578): a scalar source plus an arithmetic (expression) offset -/// from which ELM MAP cannot yet infer a result shape, so `y` does not -/// compile at all -- a compiler shape-inference gap, NOT the base/stride -/// numeric bug fixed here; its genuine value `y[A1]=3,y[A2]=4,y[A3]=5` is -/// in `vector.dat`, and closing #578 lets `y` rejoin this gate. Second, -/// `p[DimA,DimB] = VECTOR SORT ORDER(o[DimA,DimB], ASCENDING)` (GitHub -/// #576): a genuinely 2-D VSO whose `vector.dat` `p` block is internally -/// inconsistent / encodes an sdeverywhere per-row semantic, with -/// genuine-Vensim multi-dimensional VSO semantics unverified by any live -/// fixture -- a different builtin (VSO, unchanged by this phase) out of -/// Phase 5's ELM MAP scope. +/// The 2-D `p[DimA,DimB] = VECTOR SORT ORDER(o[DimA,DimB], ASCENDING)` is +/// now a hard gate too (GitHub #576, closed): the #585 fix made VECTOR SORT +/// ORDER rank *per iterated row* (innermost dim = sorted axis, 0-based), so +/// the engine now produces `p = [0,1,1,0,0,1]`, matching `vector.dat` +/// exactly. Real-Vensim multi-dimensional VSO semantics are independently +/// confirmed by C-LEARN (`simulates_clearn` vs `Ref.vdf`), so the dormant +/// fixture is no longer ambiguous and `p` is checked here directly. /// -/// Excluded variables are dropped from the compiled model (so #578's +/// Exactly one variable is carved out, a pre-existing and separately-tracked +/// gap unrelated to the ELM MAP base/full-source fix (per the phase file's +/// "prefer full inclusion; narrow only with a tracked issue" guidance): +/// `y[DimA] = VECTOR ELM MAP(x[three], (DimA-1))` (GitHub #578), a scalar +/// source plus an arithmetic (expression) offset from which ELM MAP cannot +/// yet infer a result shape, so `y` does not compile at all -- a compiler +/// shape-inference gap, NOT the base/stride numeric bug fixed here; its +/// genuine value `y[A1]=3,y[A2]=4,y[A3]=5` is in `vector.dat`, and closing +/// #578 lets `y` rejoin this gate. +/// +/// The excluded variable is dropped from the compiled model (so #578's /// non-compiling `y` cannot abort the project) and skipped in every -/// comparison; the genuine gate on `c`/`f`/`g` (and all other variables) +/// comparison; the genuine gate on `c`/`f`/`g`/`p` (and all other variables) /// is NOT weakened. #[test] fn simulates_vector_xmile_genuine() { @@ -1762,9 +1762,8 @@ fn simulates_vector_xmile_genuine() { "../../test/sdeverywhere/models/vector/vector.xmile", compile_vm, // y: GitHub #578 (scalar-source/expression-offset ELM MAP compile - // gap). p: GitHub #576 (dormant/unverified 2-D VSO fixture data). - // Both pre-existing and out of Phase 5's ELM MAP scope. - &["y", "p"], + // gap), pre-existing and out of Phase 5's ELM MAP scope. + &["y"], ); } From 02a1951f0accff05109ecf38c96c07db1972c959 Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Mon, 8 Jun 2026 18:41:18 -0700 Subject: [PATCH 2/3] engine: numerically gate VECTOR ELM MAP full_source_len on the strict-slice path The full-array-source out_of_bounds_element_returns_nan_{vm,monolithic} tests already catch a corrupted full_source_len (both the codegen computation and the renumber_opcode/resolve path) -- contrary to GH #579's framing, which only checked the three genuine-Vensim .dat simulate gates (none of which have an out-of-range offset). The genuine residual gap was the STRICT-SLICE source branch (base != 0, source_is_full_array == false): no end-to-end test fed an out-of-range offset through it, so full_source_len's role there -- both the full-array-vs-strict-slice decision and the [0, full_source_len) -> :NA: guard -- was unverified numerically. Adds strict_slice_source_oob_returns_nan_{vm,monolithic}: a cross-dimension d[DimA,B1] source whose A3 row offset (5) pushes the lookup to flat 9, past d's 6-element storage, yielding NaN. Inflating full_source_len makes that read land in-bounds (a finite value) and deflating it flips the branch to full-array (base forced to 0); either corruption drops the expected NaN and fails the assertion. Verified red under both corruption sites, green when correct. Updates the symbolic.rs comment that claimed full_source_len was behaviorally invisible end-to-end. Closes #579. --- src/simlin-engine/src/array_tests.rs | 73 ++++++++++++++++++++++ src/simlin-engine/src/compiler/symbolic.rs | 39 ++++++------ 2 files changed, 94 insertions(+), 18 deletions(-) diff --git a/src/simlin-engine/src/array_tests.rs b/src/simlin-engine/src/array_tests.rs index 8f9a7b500..a84b7e62b 100644 --- a/src/simlin-engine/src/array_tests.rs +++ b/src/simlin-engine/src/array_tests.rs @@ -3643,6 +3643,79 @@ mod vector_elm_map_tests { } } + // GH #579: end-to-end numeric gate on `full_source_len` for a STRICT-SLICE + // ELM MAP source (the cross-dimension `d[DimA,B1]` shape, base != 0). The + // existing `out_of_bounds_element_returns_nan_*` tests use a *full-array* + // source (`source[*]`, base 0, `source_is_full_array == true`); this one + // covers the other branch, where `full_source_len` drives BOTH the + // full-array-vs-strict-slice decision AND the out-of-range -> :NA: guard. + // + // Fixture: same `d`/shape as `cross_dimension_source_resolves_full_array`, + // but `a[A3] = 5` pushes the A3 row's lookup past the source's full + // 6-element storage: + // d full storage (row-major [DimA,DimB], strides [2,1]): + // 0=d11=1 1=d12=4 2=d21=2 3=d22=5 4=d31=3 5=d32=6 + // A1,* : base = 0, offset 0 -> flat 0 -> d[0]=1 + // A2,* : base = 2, offset 1 -> flat 3 -> d[3]=5 + // A3,* : base = 4, offset 5 -> flat 9 >= full_source_len(6) -> :NA: (NaN) + // f row-major [DimA,DimB], broadcast across DimB: [1,1, 5,5, NaN,NaN]. + // + // Why this catches a corrupted `full_source_len`: inflating it (e.g. to 99) + // makes flat 9 pass the `[0, full_len)` guard and read past `d`'s storage + // instead of yielding NaN; deflating it to the 3-element slice size flips + // `source_is_full_array` to true (base forced to 0), changing every row. + // Either way the A3 elements stop being NaN, so the assertion fails -- + // unlike the genuine-Vensim `.dat` simulate corpus, which has no + // out-of-range offset on a strict-slice source. + fn make_strict_slice_oob_project(name: &str) -> TestProject { + TestProject::new(name) + .named_dimension("DimA", &["A1", "A2", "A3"]) + .named_dimension("DimB", &["B1", "B2"]) + .array_with_ranges("a[DimA]", vec![("A1", "0"), ("A2", "1"), ("A3", "5")]) + .array_with_ranges( + "d[DimA,DimB]", + vec![ + ("A1,B1", "1"), + ("A2,B1", "2"), + ("A3,B1", "3"), + ("A1,B2", "4"), + ("A2,B2", "5"), + ("A3,B2", "6"), + ], + ) + .array_aux("f[DimA,DimB]", "vector_elm_map(d[DimA,B1], a[DimA])") + } + + fn assert_strict_slice_oob(vals: &[f64]) { + assert_eq!(vals.len(), 6, "expected 6 elements (DimA x DimB)"); + let finite_expected = [(0usize, 1.0), (1, 1.0), (2, 5.0), (3, 5.0)]; + for &(i, want) in &finite_expected { + assert!( + (vals[i] - want).abs() < 1e-9, + "f[{i}]: expected {want}, got {} (full vals: {vals:?})", + vals[i] + ); + } + assert!( + vals[4].is_nan() && vals[5].is_nan(), + "f[A3,*] (base 4 + offset 5 = flat 9 >= full source len 6): expected NaN, got [{}, {}]", + vals[4], + vals[5] + ); + } + + #[test] + fn strict_slice_source_oob_returns_nan_vm() { + let project = make_strict_slice_oob_project("vem_slice_oob_vm"); + assert_strict_slice_oob(&project.vm_result_incremental("f")); + } + + #[test] + fn strict_slice_source_oob_returns_nan_monolithic() { + let project = make_strict_slice_oob_project("vem_slice_oob_mono"); + assert_strict_slice_oob(&project.vm_result("f")); + } + // AC6.4 defense-in-depth: a 1-D source[*] argument has base = 0 for all // result elements (no ActiveDimRef element reference), so genuine Vensim // reduces to result[i] = source[round(offset[i])] over the full source. diff --git a/src/simlin-engine/src/compiler/symbolic.rs b/src/simlin-engine/src/compiler/symbolic.rs index acbabb828..4d09457cc 100644 --- a/src/simlin-engine/src/compiler/symbolic.rs +++ b/src/simlin-engine/src/compiler/symbolic.rs @@ -1185,24 +1185,27 @@ pub(crate) fn resolve_opcode( // `renumber_opcode` and copied through unchanged on fragment // concatenation (see the matching arm in `renumber_opcode`). // - // Roundtrip coverage of this invariant lives in the unit tests, - // NOT the end-to-end simulate gates, and intentionally so: - // `vm_vector_elm_map` only consumes `full_source_len` for those - // two purposes, and the genuine-Vensim corpus - // (`vector_simple.dat` / `vector.dat`) deliberately has no - // out-of-range offset and no shape that flips the full-array - // branch, so an inflated `full_source_len` is behaviorally - // invisible through `simulates_vector_simple_mdl` / - // `simulates_vector_xmile_genuine` (verified: those gates pass - // even with `full_source_len` hard-forced to a wrong constant in - // `renumber_opcode`). The authoritative regression coverage is - // therefore the symbolic path itself: - // `test_renumber_vector_builtin_temp_ids` (isolated - // `renumber_opcode`) and - // `test_vector_elm_map_full_source_len_survives_fragment_roundtrip` - // (the full `symbolize` -> `concatenate_fragments` (renumber at a - // non-zero temp offset) -> `resolve_bytecode` merge path), which - // fail loudly if this field is ever shifted. + // The genuine-Vensim `.dat` simulate corpus (`vector_simple.dat` / + // `vector.dat`) deliberately has no out-of-range offset and no + // shape that flips the full-array branch, so a wrong + // `full_source_len` is invisible through `simulates_vector_simple_mdl` + // / `simulates_vector_xmile_genuine` alone. The NUMERIC end-to-end + // coverage (GH #579) therefore lives in `array_tests`: the + // full-array-source `out_of_bounds_element_returns_nan_{vm, + // monolithic}` (base 0, `source_is_full_array == true`) and the + // strict-slice-source `strict_slice_source_oob_returns_nan_{vm, + // monolithic}` (base != 0, the other branch) both feed an + // out-of-range offset, so a `full_source_len` corrupted in EITHER + // the codegen computation (`codegen::full_source_len`) OR this + // `resolve`/`renumber_opcode` path stops yielding the expected NaN + // and the assertions fail loudly (verified by hard-forcing a wrong + // constant in both sites). The structural symbolic round-trip -- + // `test_renumber_vector_builtin_temp_ids` (isolated `renumber_opcode`) + // and `test_vector_elm_map_full_source_len_survives_fragment_roundtrip` + // (the full `symbolize` -> `concatenate_fragments` -> `resolve_bytecode` + // merge path) -- complements them by pinning that this field is + // invariant under renumbering (it is NOT a renumber-able resource id + // like temp/lit/gf/view/dim_list/module). full_source_len: *full_source_len, }), SymbolicOpcode::VectorSortOrder { write_temp_id } => Ok(Opcode::VectorSortOrder { From 2fb17c20e6e2b8d7a863f1adf4468fafd78ebb43 Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Mon, 8 Jun 2026 20:48:05 -0700 Subject: [PATCH 3/3] engine: compile scalar-source / expression-offset VECTOR ELM MAP y[DimA] = VECTOR ELM MAP(x[three], (DimA-1)) -- a fully-collapsed scalar source element reference plus a per-element arithmetic offset -- previously failed to compile (NotSimulatable, "failed to compile fragments"). Two coupled gaps caused it: 1. Inside an array-producing builtin a fully-collapsed source subscript like x[three] was promoted back to the whole source array (Wildcard), discarding the element's base offset. That is only correct when the element is index 0 (the historical b[B1] case); for x[three] (base 2) the base was lost. The collapsed scalar view is now kept as a StaticSubscript whose view.offset is the element's flat index, so codegen::full_source_len recovers the full source length and the VM maps from the right base. 2. The per-element offset (DimA-1) lowers to a compile-time constant, which the array-producing ELM MAP opcode cannot consume (it needs a *view* offset). A new compile-time fold rewrites VECTOR ELM MAP(scalar_source, const_offset) into a direct read of the source variable's full storage at off + base + round(offset), or :NA: (NaN) when that flat index is out of range. The fold recurses through Op2/If so a nested form (10 + ELM MAP(...)) folds too, but never descends into reducer arguments where the ELM MAP must stay an array. Because the fold emits plain slot reads, no ELM MAP opcode is generated for this shape and the wasm backend needs no change (confirmed by the wasm parity hook and simulates_clearn_wasm). The genuine-Vensim vector.xmile gate is now fully un-narrowed -- y rejoins c/f/g/p and every other variable as a hard equality against vector.dat. Closes #578. --- src/simlin-engine/src/array_tests.rs | 100 ++++++++++++++++++ src/simlin-engine/src/compiler/context.rs | 61 ++++++----- src/simlin-engine/src/compiler/mod.rs | 93 ++++++++++++++++ .../tests/integration/simulate.rs | 66 +++++------- 4 files changed, 254 insertions(+), 66 deletions(-) diff --git a/src/simlin-engine/src/array_tests.rs b/src/simlin-engine/src/array_tests.rs index a84b7e62b..7e648fd26 100644 --- a/src/simlin-engine/src/array_tests.rs +++ b/src/simlin-engine/src/array_tests.rs @@ -3474,6 +3474,106 @@ mod vector_select_action_tests { mod vector_elm_map_tests { use crate::test_common::TestProject; + // GH #578: a SCALAR source element reference (`x[three]`, a single + // fully-collapsed element) plus an arithmetic per-element OFFSET + // expression (`(DimA - 1)`, a dimension-position term). Genuine Vensim + // maps over the source variable's FULL storage from the base the element + // reference establishes: result[i] = x_full[base(three) + round(off_i)]. + // + // x[DimX] = 1,2,3,4,5 (DimX = one,two,three,four,five; full storage + // flat 0..4) + // base(three) = 2 (0-based flat index of element `three`) + // off_i = (DimA - 1): A1->0, A2->1, A3->2 (DimA is the 1-based position) + // y[A1] = x_full[2+0] = 3 + // y[A2] = x_full[2+1] = 4 + // y[A3] = x_full[2+2] = 5 + // This is the `y` variable of test/sdeverywhere/models/vector/vector.dat + // (y[A1]=3, y[A2]=4, y[A3]=5), previously excluded from the genuine gate. + fn make_scalar_source_dim_offset_project(name: &str) -> TestProject { + TestProject::new(name) + .named_dimension("DimA", &["A1", "A2", "A3"]) + .named_dimension("DimX", &["one", "two", "three", "four", "five"]) + .array_with_ranges( + "x[DimX]", + vec![ + ("one", "1"), + ("two", "2"), + ("three", "3"), + ("four", "4"), + ("five", "5"), + ], + ) + .array_aux("y[DimA]", "vector_elm_map(x[three], (DimA - 1))") + } + + #[test] + fn scalar_source_dim_offset_vm() { + let project = make_scalar_source_dim_offset_project("vem_scalar_src_vm"); + project.assert_vm_result_incremental("y", &[3.0, 4.0, 5.0]); + } + + #[test] + fn scalar_source_dim_offset_monolithic() { + let project = make_scalar_source_dim_offset_project("vem_scalar_src_mono"); + project.assert_vm_result("y", &[3.0, 4.0, 5.0]); + } + + // GH #578 nested: the same scalar-source ELM MAP wrapped in arithmetic + // (`10 + ...`). The fold recurses through the surrounding expression, so + // each element becomes `10 + x_full[2 + off_i]` = 13, 14, 15. + #[test] + fn scalar_source_dim_offset_nested_vm() { + let project = TestProject::new("vem_scalar_src_nested_vm") + .named_dimension("DimA", &["A1", "A2", "A3"]) + .named_dimension("DimX", &["one", "two", "three", "four", "five"]) + .array_with_ranges( + "x[DimX]", + vec![ + ("one", "1"), + ("two", "2"), + ("three", "3"), + ("four", "4"), + ("five", "5"), + ], + ) + .array_aux("z[DimA]", "10 + vector_elm_map(x[three], (DimA - 1))"); + project.assert_vm_result_incremental("z", &[13.0, 14.0, 15.0]); + } + + // GH #578 out-of-range: a scalar source whose base + per-element offset + // walks off the end of the source's full storage yields :NA: (NaN). Here + // base(four) = 3, off_i = (DimA-1) = 0,1,2 -> flat 3,4,5; flat 5 is past + // x's 5-element storage -> NaN. + // y2[A1] = x_full[3] = 4 + // y2[A2] = x_full[4] = 5 + // y2[A3] = x_full[5] = :NA: (NaN) + #[test] + fn scalar_source_dim_offset_oob_returns_nan_vm() { + let project = TestProject::new("vem_scalar_src_oob_vm") + .named_dimension("DimA", &["A1", "A2", "A3"]) + .named_dimension("DimX", &["one", "two", "three", "four", "five"]) + .array_with_ranges( + "x[DimX]", + vec![ + ("one", "1"), + ("two", "2"), + ("three", "3"), + ("four", "4"), + ("five", "5"), + ], + ) + .array_aux("y2[DimA]", "vector_elm_map(x[four], (DimA - 1))"); + let vals = project.vm_result_incremental("y2"); + assert_eq!(vals.len(), 3); + assert!((vals[0] - 4.0).abs() < 1e-9, "y2[A1]: {}", vals[0]); + assert!((vals[1] - 5.0).abs() < 1e-9, "y2[A2]: {}", vals[1]); + assert!( + vals[2].is_nan(), + "y2[A3] (flat 5 OOB): expected NaN, got {}", + vals[2] + ); + } + // source[D] = [10, 20, 30] (3 elements, valid indices 0..2) // offsets[D] = [0, 2, 5] -- index 5 exceeds source length, so third element -> NaN fn make_oob_project(name: &str) -> TestProject { diff --git a/src/simlin-engine/src/compiler/context.rs b/src/simlin-engine/src/compiler/context.rs index ce6ed35c5..5b9ed90cc 100644 --- a/src/simlin-engine/src/compiler/context.rs +++ b/src/simlin-engine/src/compiler/context.rs @@ -937,6 +937,21 @@ impl Context<'_> { sim_err!(DoesNotExist, "Variable not found by offset".to_string()) } + /// Full element count of the variable whose storage *begins* at `base_off` + /// (the product of its declared dimensions; 1 for a scalar). Returns `None` + /// if no variable owns that exact base offset. This is the compile-time + /// twin of `codegen::full_source_len`, used by the GH #578 scalar-source + /// VECTOR ELM MAP fold to bound the per-element static read. + pub(super) fn full_var_len_for_base(&self, base_off: usize) -> Option { + let md = self.get_variable_metadata_by_offset(base_off).ok()?; + Some( + md.var + .get_dimensions() + .map(|dims| dims.iter().map(|d| d.len()).product::().max(1)) + .unwrap_or(1), + ) + } + pub(super) fn build_stock_update_expr(&self, stock_off: usize, var: &Variable) -> Result { if let Variable::Stock { inflows, outflows, .. @@ -1387,33 +1402,25 @@ impl Context<'_> { return Ok(Expr::StaticSubscript(off, preserved_result.view, *loc)); } else { if view.dims.is_empty() { - // Inside array-producing builtins, a fully-collapsed - // subscript like b[B1] should be promoted back to the - // full source array. The Single ops came from named - // element subscripts (not ActiveDimRef resolution), so - // promoting them to Wildcard restores the array view - // that VectorElmMap/VectorSortOrder expect. - let has_single_ops = self.promote_active_dim_ref - && operations.iter().any(|op| matches!(op, IndexOp::Single(_))); - if has_single_ops { - let promoted_ops: Vec = operations - .iter() - .map(|op| match op { - IndexOp::Single(_) => IndexOp::Wildcard, - other => other.clone(), - }) - .collect(); - let promoted_result = build_view_from_ops( - &promoted_ops, - &orig_dims, - &orig_strides, - &view_config, - )?; - return Ok(Expr::StaticSubscript( - off, - promoted_result.view, - *loc, - )); + // Inside an array-producing builtin a fully- + // collapsed source element reference (e.g. + // `x[three]` or `b[B1]`) must keep the element's + // base offset so the builtin maps over the source + // variable's FULL storage starting from that base + // (genuine-Vensim VECTOR ELM MAP, GH #578). + // Return the collapsed scalar view as a + // StaticSubscript -- its `view.offset` is the + // element's flat index, and the variable base + // (`off`) lets `codegen::full_source_len` recover + // the full source length. The earlier code + // promoted the Single ops back to a whole-array + // Wildcard view, which discarded the base and was + // only correct when the element is index 0 (the + // historical `b[B1]` case, where base == 0). + if self.promote_active_dim_ref + && operations.iter().any(|op| matches!(op, IndexOp::Single(_))) + { + return Ok(Expr::StaticSubscript(off, view, *loc)); } return Ok(Expr::Var(off + view.offset, *loc)); } diff --git a/src/simlin-engine/src/compiler/mod.rs b/src/simlin-engine/src/compiler/mod.rs index 79cf293a0..06f9fbbb1 100644 --- a/src/simlin-engine/src/compiler/mod.rs +++ b/src/simlin-engine/src/compiler/mod.rs @@ -1885,6 +1885,12 @@ fn expand_a2a_hoisted( temp_id = temp_id.max(max + 1); } + // GH #578: fold any scalar-source / constant-offset ELM MAP + // nested in this element's expression to a direct read before + // the array-builtin hoister runs; whatever array-producing + // builtins remain are hoisted normally. + let elem_main = fold_scalar_source_elm_maps(ctx, elem_main); + let mut hoisted = Vec::new(); let elem_rewritten = replace_nested_builtins_for_element( elem_main, @@ -1940,6 +1946,80 @@ fn expand_a2a_hoisted( } } +/// GH #578: fold a single element of `VECTOR ELM MAP(scalar_source, offset)` +/// into a direct read when the source is a fully-collapsed element reference +/// and the per-element offset is a compile-time constant. +/// +/// Genuine Vensim maps the result over the source variable's FULL row-major +/// storage from the base the element reference establishes: +/// `result = source_full[base + round(offset)]`. When `source` is a scalar +/// `StaticSubscript` (its `view.offset` is the element's flat index and `off` +/// is the variable base) and `offset` folds to a constant, the whole read is +/// known at compile time: it is the variable slot `off + base + round(offset)`, +/// or `:NA:` (NaN) if that flat index is outside `[0, full_source_len)`. +/// +/// This is what lets a scalar-source / expression-offset ELM MAP compile at +/// all: the array-producing ELM MAP opcode needs a *view* offset, but here the +/// per-element offset lowers to a `Const` (e.g. `(DimA - 1)` -> `0, 1, 2`), +/// which is not a view. Returns `None` for any shape this fold does not cover +/// (non-scalar source, non-constant offset), leaving the normal path to run. +fn try_fold_scalar_source_elm_map(ctx: &Context, main_expr: &Expr) -> Option { + let Expr::App(BuiltinFn::VectorElmMap(source, offset), loc) = main_expr else { + return None; + }; + // Source must be a fully-collapsed (scalar) element reference carrying its + // base: a StaticSubscript with no remaining dimensions, whose `off` is the + // variable base and `view.offset` the element's flat index within it. + let (base_off, elem_flat) = match source.as_ref() { + Expr::StaticSubscript(off, view, _) if view.dims.is_empty() => (*off, view.offset), + _ => return None, + }; + // The per-element offset must be a compile-time constant (it is not a view, + // so the ELM MAP opcode could not consume it anyway). + let Expr::Const(offset_val, _) = fold::fold_constants((**offset).clone()) else { + return None; + }; + let full_len = ctx.full_var_len_for_base(base_off)?; + // round() matches the VM's `vm_vector_elm_map` per-element offset rounding. + let flat = elem_flat as i64 + offset_val.round() as i64; + if flat < 0 || flat >= full_len as i64 { + Some(Expr::Const(f64::NAN, *loc)) + } else { + Some(Expr::Var(base_off + flat as usize, *loc)) + } +} + +/// Recursively apply [`try_fold_scalar_source_elm_map`] through the +/// scalar-value wrappers of a per-element expression, so a scalar-source ELM +/// MAP nested in arithmetic (`10 + VECTOR ELM MAP(x[three], (DimA-1))`) folds +/// too (GH #578). +/// +/// Recursion is restricted to `Op2`/`If`, which propagate a scalar-value +/// context to their operands in this per-element lowering (unary minus is +/// already lowered to `Op2(Sub, 0, x)`). It deliberately does NOT descend into +/// `Expr::App` arguments: a reducer like `SUM(elm_map_array)` consumes the ELM +/// MAP as an *array*, and folding it to a single element there would be wrong. +fn fold_scalar_source_elm_maps(ctx: &Context, expr: Expr) -> Expr { + if let Some(folded) = try_fold_scalar_source_elm_map(ctx, &expr) { + return folded; + } + match expr { + Expr::Op2(op, l, r, loc) => Expr::Op2( + op, + Box::new(fold_scalar_source_elm_maps(ctx, *l)), + Box::new(fold_scalar_source_elm_maps(ctx, *r)), + loc, + ), + Expr::If(c, t, f, loc) => Expr::If( + Box::new(fold_scalar_source_elm_maps(ctx, *c)), + Box::new(fold_scalar_source_elm_maps(ctx, *t)), + Box::new(fold_scalar_source_elm_maps(ctx, *f)), + loc, + ), + other => other, + } +} + /// Per-element hoisting for array-producing builtins whose scalar arguments /// depend on the active dimension (e.g. `vector_sort_order(vals[*], dir[D])`). /// Each element gets its own AssignTemp so the builtin is re-evaluated with @@ -1991,6 +2071,19 @@ fn expand_a2a_per_element_hoisted( main }; + // GH #578: a scalar-source ELM MAP with a per-element constant offset + // folds to a direct slot read (or :NA:), so the array-producing opcode + // -- which requires a *view* offset the constant cannot supply -- is + // skipped entirely. If the fold collapses the whole element expression + // to a scalar (no array-producing builtin left), emit it directly with + // no temp consumed. + let main_expr = fold_scalar_source_elm_maps(ctx, main_expr); + if !is_array_producing_builtin(&main_expr) && !contains_array_producing_builtin(&main_expr) + { + result.push(Expr::AssignCurr(off + i, Box::new(main_expr))); + continue; + } + let temp_id = next_tid; next_tid = temp_id + 1; let builtin_view = find_expr_array_view(&main_expr).unwrap_or_else(|| var_view.clone()); diff --git a/src/simlin-engine/tests/integration/simulate.rs b/src/simlin-engine/tests/integration/simulate.rs index 45d4a5386..2c8bfdde3 100644 --- a/src/simlin-engine/tests/integration/simulate.rs +++ b/src/simlin-engine/tests/integration/simulate.rs @@ -1102,9 +1102,12 @@ fn simulate_path_with(xmile_path: &str, compile: CompileFn) { /// not-yet-supported variable does not block the whole model from /// compiling -- and (b) skipped in every comparison path (VM, protobuf /// round-trip, XMILE round-trip). Every *other* variable stays a hard -/// genuine-Vensim equality gate. Used to keep `vector.xmile`'s ELM MAP -/// base/full-source variables (`c`/`f`/`g`) as hard gates while excluding -/// only `y` (GitHub #578), rather than weakening the whole comparison. +/// genuine-Vensim equality gate. This is the general narrowing mechanism for +/// gating a model on the variables it DOES support while a separately-tracked +/// gap on one variable is fixed; no corpus model currently needs it (the last +/// user, `vector.xmile`'s `y`, became a full gate once GitHub #578 closed), but +/// it is kept as the backing implementation of [`simulate_path`] (`excluded` +/// = `&[]`) and for the next such gap. fn simulate_path_with_excluding(xmile_path: &str, compile: CompileFn, excluded: &[&str]) { eprintln!("model: {xmile_path}"); @@ -1695,15 +1698,13 @@ corpus_tests! { // VECTOR ELM MAP now matches genuine Vensim (per-element base + full // source array, out-of-range -> :NA:, no modulo). vector.xmile is // exercised through all three comparison paths by the dedicated - // `simulates_vector_xmile_genuine` test below, NOT here: that test keeps - // c/f/g (the ELM MAP base/full-source variables), the 2-D VECTOR SORT - // ORDER `p`, and every other variable as hard genuine-Vensim gates - // against test/sdeverywhere/models/vector/vector.dat, narrowing the - // comparison to exclude only one pre-existing, separately-tracked, - // out-of-scope variable -- `y` (GitHub #578: scalar-source/expression- - // offset ELM MAP does not compile). This list runs an unconditional full - // comparison, which cannot carve out that variable; the narrowed gate - // lives in its own test instead of weakening every model's comparison. + // `simulates_vector_xmile_genuine` test below, NOT here: that test gates + // EVERY variable in the model (including the ELM MAP base/full-source + // variables c/f/g, the scalar-source ELM MAP `y`, and the 2-D VECTOR SORT + // ORDER `p`) as a hard genuine-Vensim equality against + // test/sdeverywhere/models/vector/vector.dat, with no exclusions. It lives + // in its own test only so its provenance can be documented; the comparison + // is the same full one this list would run. // "test/sdeverywhere/models/vector/vector.xmile", // -> simulates_vector_xmile_genuine // // --- Permanently excluded (not test models) --- @@ -1734,37 +1735,24 @@ corpus_tests! { /// exercises (1-D VSO `l`/`m`, VECTOR SELECT `q`/`r`/`s`, reducers /// `u`/`v`/`w`, and the rest). /// -/// The 2-D `p[DimA,DimB] = VECTOR SORT ORDER(o[DimA,DimB], ASCENDING)` is -/// now a hard gate too (GitHub #576, closed): the #585 fix made VECTOR SORT -/// ORDER rank *per iterated row* (innermost dim = sorted axis, 0-based), so -/// the engine now produces `p = [0,1,1,0,0,1]`, matching `vector.dat` -/// exactly. Real-Vensim multi-dimensional VSO semantics are independently -/// confirmed by C-LEARN (`simulates_clearn` vs `Ref.vdf`), so the dormant -/// fixture is no longer ambiguous and `p` is checked here directly. +/// The 2-D `p[DimA,DimB] = VECTOR SORT ORDER(o[DimA,DimB], ASCENDING)` is a +/// hard gate (GitHub #576, closed): the #585 fix made VECTOR SORT ORDER rank +/// *per iterated row* (innermost dim = sorted axis, 0-based), so the engine +/// produces `p = [0,1,1,0,0,1]`, matching `vector.dat` exactly. Real-Vensim +/// multi-dimensional VSO semantics are independently confirmed by C-LEARN +/// (`simulates_clearn` vs `Ref.vdf`). /// -/// Exactly one variable is carved out, a pre-existing and separately-tracked -/// gap unrelated to the ELM MAP base/full-source fix (per the phase file's -/// "prefer full inclusion; narrow only with a tracked issue" guidance): -/// `y[DimA] = VECTOR ELM MAP(x[three], (DimA-1))` (GitHub #578), a scalar -/// source plus an arithmetic (expression) offset from which ELM MAP cannot -/// yet infer a result shape, so `y` does not compile at all -- a compiler -/// shape-inference gap, NOT the base/stride numeric bug fixed here; its -/// genuine value `y[A1]=3,y[A2]=4,y[A3]=5` is in `vector.dat`, and closing -/// #578 lets `y` rejoin this gate. +/// The scalar-source / expression-offset `y[DimA] = VECTOR ELM MAP(x[three], +/// (DimA-1))` is also a hard gate now (GitHub #578, closed): a fully-collapsed +/// source element reference keeps its base offset, and the per-element constant +/// offset folds to a direct read of the source's full storage +/// (`y[A1]=3,y[A2]=4,y[A3]=5` in `vector.dat`). /// -/// The excluded variable is dropped from the compiled model (so #578's -/// non-compiling `y` cannot abort the project) and skipped in every -/// comparison; the genuine gate on `c`/`f`/`g`/`p` (and all other variables) -/// is NOT weakened. +/// No variables are carved out: every variable in `vector.xmile` is a hard +/// genuine-Vensim equality gate. #[test] fn simulates_vector_xmile_genuine() { - simulate_path_with_excluding( - "../../test/sdeverywhere/models/vector/vector.xmile", - compile_vm, - // y: GitHub #578 (scalar-source/expression-offset ELM MAP compile - // gap), pre-existing and out of Phase 5's ELM MAP scope. - &["y"], - ); + simulate_path("../../test/sdeverywhere/models/vector/vector.xmile"); } #[test]