From 82909b8c128cecf9754291ee9ca8ce47b860b63a Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Sun, 24 May 2026 10:59:44 +0200 Subject: [PATCH 1/3] =?UTF-8?q?feat(vectorization):=20broaden=20tuple=20op?= =?UTF-8?q?erators=20and=20recover=20precise=20types=20=F0=9F=93=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends element-wise tuple broadcast beyond binary numeric operators — unary forms (`-(1,2,3)`), n-ary scalar overloads (`("a","b") ++ ("c","d")`), per-position heterogeneous dispatch, and scalar broadcast — and restores the type-inference precision PR #140 widened to `Any` for soundness. Vec dispatch is gated on a new `Expression::OperatorCall` AST variant emitted by the parser for operator desugars, so regular calls never accidentally broadcast over tuple arguments. The analyser resolves operator calls through a single `ScopeTree::resolve_call` walk that returns both the binding and the inferred return type, with per-position candidate lookups catching mixed-element tuples (`(1, "a") + (2, "b")`) at compile time instead of mid-iteration at runtime. When the analyser pins a homogeneous vec call to one scalar overload, the compiler emits a dedicated `OpCode::CallVec(args)` whose handler broadcasts a directly-loaded scalar across the tuple axis without any overload probing. `Object::OverloadSet` now stores scalars and vec candidates in separate `Vec`s so the hot scalar walk keeps master's footprint. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + benches/programs/vec_hot_loop.ndc | 10 + docs/design/vectorization.md | 104 +++++ manual/src/reference/types/tuple.md | 24 ++ ndc_analyser/src/analyser.rs | 182 ++++---- ndc_analyser/src/scope.rs | 393 ++++++++++++++++-- ndc_bin/src/highlighter.rs | 4 + ndc_core/src/static_type.rs | 27 -- ndc_lsp/src/visitor.rs | 4 + ndc_parser/src/expression.rs | 35 +- ndc_parser/src/lib.rs | 2 +- ndc_parser/src/operator.rs | 16 - ndc_parser/src/parser.rs | 10 +- ndc_stdlib/src/serde.rs | 2 +- ndc_vm/src/chunk.rs | 7 + ndc_vm/src/compiler.rs | 186 ++++++--- ndc_vm/src/value/mod.rs | 35 +- ndc_vm/src/vm.rs | 300 ++++++++----- tests/compiler/Cargo.toml | 1 + tests/compiler/tests/compiler.rs | 67 +++ tests/functional/build.rs | 2 +- .../013_vector_math/003_vector_error2.ndc | 5 +- .../013_vector_math/004_vector_unary.ndc | 9 + .../005_vector_non_numeric.ndc | 7 + .../006_vector_mixed_elements.ndc | 6 + .../007_regular_call_no_vec.ndc | 9 + .../008_vector_chain_precision.ndc | 13 + .../009_vector_exact_match_precision.ndc | 25 ++ .../010_vector_op_assignment.ndc | 23 + .../011_vector_op_assign_aliasing.ndc | 16 + .../012_vector_heterogeneous.ndc | 19 + .../013_vector_per_position_no_overload.ndc | 5 + ...c => bug0022_combinations_lazy_source.ndc} | 0 .../bug0023_incompat_dynamic_misinferred.ndc | 9 + 34 files changed, 1231 insertions(+), 327 deletions(-) create mode 100644 benches/programs/vec_hot_loop.ndc create mode 100644 docs/design/vectorization.md create mode 100644 tests/functional/programs/013_vector_math/004_vector_unary.ndc create mode 100644 tests/functional/programs/013_vector_math/005_vector_non_numeric.ndc create mode 100644 tests/functional/programs/013_vector_math/006_vector_mixed_elements.ndc create mode 100644 tests/functional/programs/013_vector_math/007_regular_call_no_vec.ndc create mode 100644 tests/functional/programs/013_vector_math/008_vector_chain_precision.ndc create mode 100644 tests/functional/programs/013_vector_math/009_vector_exact_match_precision.ndc create mode 100644 tests/functional/programs/013_vector_math/010_vector_op_assignment.ndc create mode 100644 tests/functional/programs/013_vector_math/011_vector_op_assign_aliasing.ndc create mode 100644 tests/functional/programs/013_vector_math/012_vector_heterogeneous.ndc create mode 100644 tests/functional/programs/013_vector_math/013_vector_per_position_no_overload.ndc rename tests/functional/programs/900_bugs/{bug0021_combinations_lazy_source.ndc => bug0022_combinations_lazy_source.ndc} (100%) create mode 100644 tests/functional/programs/900_bugs/bug0023_incompat_dynamic_misinferred.ndc diff --git a/Cargo.lock b/Cargo.lock index 43510a3c..ee29880e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,6 +320,7 @@ dependencies = [ "ndc_interpreter", "ndc_lexer", "ndc_parser", + "ndc_stdlib", "ndc_vm", ] diff --git a/benches/programs/vec_hot_loop.ndc b/benches/programs/vec_hot_loop.ndc new file mode 100644 index 00000000..5625505d --- /dev/null +++ b/benches/programs/vec_hot_loop.ndc @@ -0,0 +1,10 @@ +// Vec dispatch hot loop. Mirrors the per-iteration vec calls that drove +// the AoC 2025/08 regression in PR #141: a tight loop over millions of +// `Tuple + Tuple` calls. The CallVec fast path +// should bring this close to the master baseline. +let n = 200_000; +let acc = (0, 0); +for i in 0..n { + acc = acc + (i, i); +} +print(acc); diff --git a/docs/design/vectorization.md b/docs/design/vectorization.md new file mode 100644 index 00000000..ec6002a0 --- /dev/null +++ b/docs/design/vectorization.md @@ -0,0 +1,104 @@ +# Vectorization + +Operator syntax broadcasts element-wise over tuples. `a + b` where both +arguments are `Tuple` resolves to two `+(Int, Int)` calls and a +tuple build. The mechanism is gated to operator syntax so regular function +calls never accidentally broadcast. + +## Background + +PR [#140] widened `Binding::Dynamic` return types to `Any` to fix issue +[#139]: the analyser had been LUB-ing declared overload returns, but the +value-level dispatcher could fall through to vec dispatch and produce a +value no declared overload returned. The widening pessimised every dynamic +caller — including ones with no vec path at all. The current design +restores that precision by tracking vec-ness on the binding rather than +on a separate fallback path, and broadens vec to cover n-ary operators +and non-numeric overloads. + +[#139]: https://github.com/timfennis/andy-cpp/issues/139 +[#140]: https://github.com/timfennis/andy-cpp/pull/140 + +## Three pieces + +### 1. `Expression::OperatorCall` distinguishes operator desugars + +The parser emits `Expression::OperatorCall { function, arguments }` for +`a + b`, `-x`, `op=`, and `not x` — same shape as `Call` but a distinct +variant. Downstream layers pattern-match exhaustively: the analyser opts +into vec dispatch on `OperatorCall` only, while `Call` keeps regular +semantics. No flag, no curated list of operator names anywhere outside +the parser. + +### 2. `Candidate` distinguishes scalar from vec overloads + +```rust +pub enum Candidate { + Scalar(ResolvedVar), + /// Element-wise tuple broadcast over the scalar that `var()` returns. + Vec(ResolvedVar), +} +``` + +`Binding::{Resolved,Dynamic}` carry `Candidate`/`Vec`. The +analyser pins `Resolved(Candidate::Vec(scalar))` when per-position +resolution unanimously picks one scalar; it carries a mixed list as +`Dynamic` when types aren't precise enough. + +### 3. Per-position vec resolution + +For an operator-form call `op(a₁, …, aₙ)` where at least one `aᵢ` is +statically a non-empty tuple of length `k`, the analyser: + +1. Builds a per-position signature for each `i ∈ 0..k`: tuple args + contribute `arg[i]`, scalar args broadcast unchanged. +2. Looks up scalar overloads for each position signature. +3. **All positions pick the same scalar**: emit + `Binding::Resolved(Candidate::Vec(scalar))`, result type + `Tuple`. +4. **Mixed positions**: emit `Binding::Dynamic(merged_candidates)`, + result type = per-position LUB wrapped as `Tuple<…>`. +5. **Any position has zero candidates**: emit `Binding::None`. The call + can't succeed at runtime either, so we error at compile time with + `function_not_found`. + +## Runtime dispatch + +Two opcodes carry vec work: + +* `CallVec(args)` — the compiler emits this for `Resolved(Vec)`. The + scalar is loaded directly (no `OverloadSet` wrapper); the VM reads the + broadcast axis from the tuple args at runtime and calls the known + scalar `axis_len` times. This is the fast path that recovers the perf + the per-element re-probe would cost. + +* `Call(args)` with an `OverloadSet` callee — used for `Dynamic`. The + dispatcher walks candidates in priority order: scalars first + (first-match-wins), then vec candidates produce a `Callable::Vec` + carrying the list of scalars that the broadcast loop narrows per + element pair. The pinned-single-scalar case (one vec candidate) skips + the per-element probe via the same fast path `CallVec` uses. + +Element-call failures surface with `while vectorising '' at index N` +prefixed to the inner message, so the outer call and failing position +appear in the error. + +## What changed vs the old design + +| Old | New | +|---|---| +| Binary numeric vec only | n-ary, any scalar overload | +| `Binding::Dynamic` widened all returns to `Any` | LUB-d for pure scalar; precise `Tuple<…>` for vec | +| Runtime `try_vectorized_call` post-check | First-class candidate in `OverloadSet` + `CallVec` opcode | +| Mixed-element tuples crashed mid-iteration | Compile-time `function_not_found` | +| Unary `-(1, 2, 3)` errored | Broadcasts to `(-1, -2, -3)` | + +## Notes + +* **Per-position LUB collapse**: `(Int, Float) + (Float, Int)` infers + `Tuple` rather than the per-element-precise + `Tuple`. The simpler uniform return type keeps the + candidate list small; the cost is rare in practice. +* **Empty tuples** decline vec resolution — they have no broadcast axis. +* **Indexing** (`a[i]`) parses as `Call`, not `OperatorCall`: there's no + natural broadcast story for `(list_a, list_b)[i]`. diff --git a/manual/src/reference/types/tuple.md b/manual/src/reference/types/tuple.md index 3a047e45..d3e484c8 100644 --- a/manual/src/reference/types/tuple.md +++ b/manual/src/reference/types/tuple.md @@ -43,3 +43,27 @@ assert_eq(b, (1,2,3,4,5)); ## Operators {{#include ../../snippets/list-operators.md}} + +## Vectorization + +Operators broadcast element-wise over tuples. Both arguments must be tuples +of the same length, or one side may be a scalar that broadcasts: + +```ndc +assert_eq((1, 2) + (3, 4), (4, 6)); +assert_eq(-(1, 2, 3), (-1, -2, -3)); +assert_eq((1, 2) + 5, (6, 7)); +assert_eq(("a", "b") ++ ("c", "d"), ("ac", "bd")); +``` + +Vectorization only kicks in for operator syntax (`a + b`, `-x`, `a ++ b`). +Regular function calls never broadcast, so `f((1, 2, 3))` passes the whole +tuple to `f` and does not call `f` once per element. + +Mixed-element tuples or length mismatches error at compile time rather than +silently producing wrong results: + +```ndc +(1, 2, 3) + (4, 5) // ERROR: no overload accepts those argument types +(1, "a") + (2, "b") // ERROR: no `+(String, String)` overload +``` diff --git a/ndc_analyser/src/analyser.rs b/ndc_analyser/src/analyser.rs index 626e8d94..30120fa2 100644 --- a/ndc_analyser/src/analyser.rs +++ b/ndc_analyser/src/analyser.rs @@ -1,13 +1,13 @@ use std::collections::HashMap; use std::fmt::Debug; -use crate::scope::{ScopeTree, TypeBinding}; +use crate::scope::{CallKind, ResolvedCall, ScopeTree, TypeBinding}; use itertools::{Itertools, izip}; use ndc_core::{StaticType, TypeSignature}; use ndc_lexer::Span; use ndc_parser::{ - Binding, Expression, ExpressionLocation, ForBody, ForIteration, FunctionParameter, Lvalue, - NodeId, + Binding, Candidate, Expression, ExpressionLocation, ForBody, ForIteration, FunctionParameter, + Lvalue, NodeId, }; /// Side table holding semantic information keyed by AST node identity. @@ -130,7 +130,7 @@ impl Analyser { return Ok(StaticType::Any); }; - *resolved = Binding::Resolved(binding); + *resolved = Binding::Resolved(Candidate::Scalar(binding)); Ok(self.scope_tree.get_type(binding).clone()) } @@ -202,36 +202,44 @@ impl Analyser { let right_type = self.analyse_or_any(r_value); let arg_types = vec![left_type, right_type]; - *resolved_assign_operation = self - .scope_tree - .resolve_function_binding(&format!("{operation}="), &arg_types); - *resolved_operation = self + // Both `op=` and `op` desugar from operator syntax, so vec + // dispatch is available for either path. Resolving `op` gives + // us the result type to widen the lvalue with — that's how + // `a += (3, 4)` on `Tuple` widens correctly instead + // of trying to widen with the scalar `+(Int,Int) -> Int`. + let ResolvedCall { + binding: assign_binding, + .. + } = self.scope_tree.resolve_call( + &format!("{operation}="), + &arg_types, + CallKind::Operator, + ); + let ResolvedCall { + binding: op_binding, + return_type: op_return, + } = self .scope_tree - .resolve_function_binding(operation, &arg_types); + .resolve_call(operation, &arg_types, CallKind::Operator); - if let Binding::None = resolved_operation { + *resolved_assign_operation = assign_binding; + *resolved_operation = op_binding; + + // Either form satisfies the call: `op=` mutates in place; + // `op` falls back through `a = a op b`. Only error when both + // are missing — e.g. `Map -= Map` is fine via `-=` even when + // `-` itself has no Map overload. + if matches!(resolved_assign_operation, Binding::None) + && matches!(resolved_operation, Binding::None) + { self.emit(AnalysisError::function_not_found( operation, &arg_types, *span, )); } - // Determine the result type of the operation - let result_type = match resolved_operation { - Binding::Resolved(res) => { - if let StaticType::Function { return_type, .. } = - self.scope_tree.get_type(*res) - { - Some(return_type.as_ref().clone()) - } else { - None - } - } - _ => None, - }; - - if let Some(result_type) = result_type { + if !matches!(resolved_operation, Binding::None) { + let result_type = op_return; match l_value { - // Direct variable: widen or reject if annotated Lvalue::Identifier { resolved: Some(target), .. @@ -249,10 +257,9 @@ impl Analyser { )); } } - // Index into a container: widen the container's type Lvalue::Index { value, .. } => { if let Expression::Identifier { - resolved: Binding::Resolved(target), + resolved: Binding::Resolved(Candidate::Scalar(target)), .. } = &value.expression { @@ -405,25 +412,11 @@ impl Analyser { Expression::Call { function, arguments, - } => { - let mut type_sig = Vec::with_capacity(arguments.len()); - for a in arguments { - type_sig.push(self.analyse_or_any(a)); - } - - let callee_type = - self.resolve_function_with_argument_types(function, &type_sig, *span); - - let StaticType::Function { return_type, .. } = callee_type else { - if callee_type == StaticType::Any { - return Ok(StaticType::Any); - } - self.emit(AnalysisError::not_callable(&callee_type, *span)); - return Ok(StaticType::Any); - }; - - Ok(*return_type) - } + } => self.analyse_call(function, arguments, CallKind::Regular, *span), + Expression::OperatorCall { + function, + arguments, + } => self.analyse_call(function, arguments, CallKind::Operator, *span), Expression::Tuple { values } => { let mut types = Vec::with_capacity(values.len()); for v in values { @@ -479,56 +472,51 @@ impl Analyser { } } - fn resolve_function_with_argument_types( + /// Analyse a call expression — either `Call` (regular) or `OperatorCall`. + /// Resolves the function binding (with vec dispatch eligible iff + /// `kind == Operator`) and returns the inferred result type. The binding + /// is written back into the function-identifier node and the return type + /// recorded on the call's [`NodeId`] via the surrounding `analyse` wrapper. + fn analyse_call( &mut self, - ident: &mut ExpressionLocation, - argument_types: &[StaticType], + function: &mut ExpressionLocation, + arguments: &mut [ExpressionLocation], + kind: CallKind, span: Span, - ) -> StaticType { - let ExpressionLocation { - expression: Expression::Identifier { name, resolved }, - .. - } = ident - else { - // It's possible that we're not trying to invoke an identifier `foo()` but instead we're - // invoking a value like `get_function()()` so in this case we just continue like normal? - return self.analyse_or_any(ident); - }; + ) -> Result { + let mut type_sig = Vec::with_capacity(arguments.len()); + for arg in arguments { + type_sig.push(self.analyse_or_any(arg)); + } - let binding = self - .scope_tree - .resolve_function_binding(name, argument_types); - - let out_type = match &binding { - Binding::None => { - self.emit(AnalysisError::function_not_found( - name, - argument_types, - span, - )); - return StaticType::Any; - } - Binding::Resolved(res) => self.scope_tree.get_type(*res).clone(), - - Binding::Dynamic(_) => { - // Dispatch is decided at runtime, so we have no sound static bound - // on the result. The runtime may pick a declared overload or fall - // through to elementwise (vectorized) dispatch, which can produce - // a value no declared overload returns — treating the LUB of - // declared returns as the result type is unsound and led to issue - // #139, where `let diff = a - b` over tuples was inferred as - // `Number` and a follow-up `diff * diff` then matched the numeric - // overload directly and bypassed dynamic dispatch entirely. - StaticType::Function { - parameters: None, - return_type: Box::new(StaticType::Any), + // Higher-order call shapes like `get_function()()` have a non-identifier + // function position; in that case we just analyse the callee as a value + // and trust the runtime to dispatch. + let Expression::Identifier { name, resolved } = &mut function.expression else { + let callee_type = self.analyse_or_any(function); + return Ok(match callee_type { + StaticType::Function { return_type, .. } => *return_type, + StaticType::Any => StaticType::Any, + other => { + self.emit(AnalysisError::not_callable(&other, span)); + StaticType::Any } - } + }); }; - *resolved = binding; + let ResolvedCall { + binding, + return_type, + } = self.scope_tree.resolve_call(name, &type_sig, kind); - out_type + if matches!(binding, Binding::None) { + self.emit(AnalysisError::function_not_found(name, &type_sig, span)); + *resolved = binding; + return Ok(StaticType::Any); + } + + *resolved = binding; + Ok(return_type) } fn resolve_for_iterations( @@ -660,8 +648,18 @@ impl Analyser { let get_args = [type_of_index_target.clone(), index_type.clone()]; let set_args = [type_of_index_target.clone(), index_type, StaticType::Any]; - *resolved_get = Some(self.scope_tree.resolve_function_binding("[]", &get_args)); - *resolved_set = Some(self.scope_tree.resolve_function_binding("[]=", &set_args)); + // Indexing isn't operator-form for vec purposes: there's no + // natural broadcast story for `(list_a, list_b)[i]`. + *resolved_get = Some( + self.scope_tree + .resolve_call("[]", &get_args, CallKind::Regular) + .binding, + ); + *resolved_set = Some( + self.scope_tree + .resolve_call("[]=", &set_args, CallKind::Regular) + .binding, + ); if let Some(t) = type_of_index_target.index_element_type() { Ok(t) diff --git a/ndc_analyser/src/scope.rs b/ndc_analyser/src/scope.rs index 14eecade..5de8a0f6 100644 --- a/ndc_analyser/src/scope.rs +++ b/ndc_analyser/src/scope.rs @@ -1,7 +1,82 @@ use ndc_core::StaticType; -use ndc_parser::{Binding, CaptureSource, ResolvedVar}; +use ndc_parser::{Binding, Candidate, CaptureSource, ResolvedVar}; use std::fmt::{Debug, Formatter}; +/// Whether a call site can fall back to element-wise tuple broadcast when no +/// scalar overload matches the argument types directly. Set by the parser: +/// operator desugars (`a + b`, `-x`, `op=`) produce `Operator`; everything +/// else (`f(x)`, dot calls, indexing) produces `Regular`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CallKind { + Regular, + Operator, +} + +/// What [`ScopeTree::resolve_call`] hands back: the binding the analyser will +/// store on the call node, plus the inferred return type of the call. +/// +/// Folding the return-type computation into the same walk that produces the +/// binding lets the analyser avoid doing per-position resolution twice on +/// every Dynamic operator-form call. +pub(crate) struct ResolvedCall { + pub binding: Binding, + pub return_type: StaticType, +} + +/// Per-position vec resolution result for an operator-form call. See +/// [`ScopeTree::resolve_vec`] for the case breakdown. +pub(crate) enum VecResolution { + Static { + axis_len: usize, + /// `positions[i]` is the priority-ordered scalar overloads compatible + /// with the call's element types at position `i`. Always non-empty — + /// an empty position cancels the whole vec resolution upstream. + positions: Vec>, + }, + AnyFallback(Vec), +} + +/// Output of a single scalar overload walk across the scope chain. +struct ScalarWalk { + /// First exact-subtype match found; short-circuits the walk. + exact: Option, + /// First-scope-wins loose-compatibility candidates. + loose: Option>, + /// Every same-name callable across the whole chain, for runtime-narrowing + /// fallback when the callee's static type is `Any`. + all_by_name: Vec, +} + +/// If every per-position candidate list contains exactly one entry and they +/// all point to the same scalar overload, return it. This is the only case +/// where `Binding::Resolved(Candidate::Vec)` is safe to emit: a single scalar +/// fires for every element pair. +fn unique_scalar(positions: &[Vec]) -> Option { + let first = positions.first()?.first().copied()?; + for pos in positions { + if pos.len() != 1 || pos[0] != first { + return None; + } + } + Some(first) +} + +/// Extend `out` with the result of mapping `vars` through `wrap`, skipping any +/// `Candidate` whose underlying `ResolvedVar` is already present in `out`. +/// Preserves first-seen order — relevant because the runtime probes +/// candidates in list order and stops at the first match. +fn extend_dedup( + out: &mut Vec, + vars: impl IntoIterator, + wrap: fn(ResolvedVar) -> Candidate, +) { + for v in vars { + if !out.iter().any(|c| c.var() == v) { + out.push(wrap(v)); + } + } +} + #[derive(Debug, Clone)] pub(crate) enum TypeBinding { Inferred(StaticType), @@ -347,38 +422,159 @@ impl ScopeTree { } } - /// Resolve a function call binding in a single scope-chain walk. + /// Resolve a function call to a binding and an inferred return type. /// - /// At each scope the priorities are: - /// 1. Exact type match on a local → return `Binding::Resolved` immediately - /// 2. Upvalues with matching name → added to candidates (not early-returned, - /// because a different overload may be an exact match in an outer scope) - /// 3. Compatible-type candidates → remember first set found (for `Binding::Dynamic`) - /// 4. All same-named bindings → accumulate as last-resort fallback - pub(crate) fn resolve_function_binding(&mut self, ident: &str, sig: &[StaticType]) -> Binding { + /// Single entry point for both regular calls (`f(x)`) and operator-form + /// calls (`a + b`, `-x`, `op=`). The two differ only in whether + /// element-wise tuple broadcast is allowed when no scalar overload matches. + /// + /// Resolution order, first match wins: + /// 1. **Scalar exact match** — `is_fn_and_matches` on the args. Returns + /// `Binding::Resolved(Scalar)`, return type = the overload's declared + /// return. + /// 2. **Operator-form vec, single scalar across all positions** — the + /// per-position lookup pins exactly one scalar for every element + /// position and no looser scalar competes. Returns + /// `Binding::Resolved(Vec)`, return type = `Tuple`. + /// 3. **Loose scalar or vec candidates** — returns + /// `Binding::Dynamic(candidates)`, return type = LUB of contributions + /// (pure-scalar LUBs declared returns; pure-vec LUBs per position and + /// wraps in `Tuple<…>`; mixed scalar+vec collapses to `Any`). + /// 4. **Same-name callables, no signature match** — last-resort + /// fallback so `Any`-typed callees still get a candidate list. + /// Returns `Binding::Dynamic`, return type = `Any`. + /// 5. Nothing found — `Binding::None`, return type = `Any`. + pub(crate) fn resolve_call( + &mut self, + ident: &str, + sig: &[StaticType], + kind: CallKind, + ) -> ResolvedCall { + let walk = self.scalar_walk(ident, sig); + + // 1. Scalar exact match. + if let Some(exact) = walk.exact { + let return_type = self.scalar_return_type(exact); + return ResolvedCall { + binding: Binding::Resolved(Candidate::Scalar(exact)), + return_type, + }; + } + + // 2. Per-position vec resolution, but only for operator-form calls. + let vec_resolution = match kind { + CallKind::Operator => self.resolve_vec(ident, sig), + CallKind::Regular => None, + }; + + // 2a. If the user clearly *meant* vec dispatch (at least one arg is + // statically a tuple) but a per-position lookup found no overload at + // some position, the call can't succeed at runtime either. Surfacing + // this as `Binding::None` lets the caller emit a precise compile-time + // error instead of letting the program limp on to a runtime miss. + if let Some(VecResolution::Static { positions, .. }) = &vec_resolution + && sig + .iter() + .any(|t| matches!(t, StaticType::Tuple(elems) if !elems.is_empty())) + && positions.iter().any(|p| p.is_empty()) + { + return ResolvedCall { + binding: Binding::None, + return_type: StaticType::Any, + }; + } + + let scalar_loose = walk.loose.unwrap_or_default(); + + // Resolved(Vec): vec is the unique candidate (no loose scalars compete) + // and every position pins the same scalar overload. + if scalar_loose.is_empty() + && let Some(VecResolution::Static { + axis_len, + positions, + }) = &vec_resolution + && let Some(scalar) = unique_scalar(positions) + { + let scalar_return = self.scalar_return_type(scalar); + return ResolvedCall { + binding: Binding::Resolved(Candidate::Vec(scalar)), + return_type: StaticType::Tuple(vec![scalar_return; *axis_len]), + }; + } + + // 3. Dynamic: mix scalar loose candidates with vec candidates. + let has_vec_candidates = vec_resolution.is_some(); + if !scalar_loose.is_empty() || has_vec_candidates { + let return_type = self.dynamic_return_type(&scalar_loose, vec_resolution.as_ref()); + let mut combined: Vec = scalar_loose + .iter() + .copied() + .map(Candidate::Scalar) + .collect(); + match vec_resolution { + Some(VecResolution::Static { positions, .. }) => { + extend_dedup(&mut combined, positions.into_iter().flatten(), |v| { + Candidate::Vec(v) + }); + } + Some(VecResolution::AnyFallback(vars)) => { + extend_dedup(&mut combined, vars, Candidate::Vec); + } + None => {} + } + return ResolvedCall { + binding: Binding::Dynamic(combined), + return_type, + }; + } + + // 4. Last-resort same-name callables (for Any-typed callees, upvalues, …). + if !walk.all_by_name.is_empty() { + return ResolvedCall { + binding: Binding::Dynamic( + walk.all_by_name + .into_iter() + .map(Candidate::Scalar) + .collect(), + ), + return_type: StaticType::Any, + }; + } + + // 5. Nothing. + ResolvedCall { + binding: Binding::None, + return_type: StaticType::Any, + } + } + + /// Per-scope walk for scalar matches against `sig`. Exact matches + /// short-circuit; `loose` holds the first scope's compatibility candidates; + /// `all_by_name` accumulates every same-name callable across the chain for + /// the runtime-narrowing fallback. + fn scalar_walk(&mut self, ident: &str, sig: &[StaticType]) -> ScalarWalk { let mut scope_ptr = self.current_scope_idx; let mut env_scopes: Vec = Vec::default(); - let mut loose_candidates: Option> = None; + let mut loose: Option> = None; let mut all_by_name: Vec = Vec::new(); loop { - // 1. Exact match on a local → return immediately if let Some(slot) = self.scopes[scope_ptr].find_function(ident, sig) { - return Binding::Resolved(self.resolve_found_local(ident, slot, &env_scopes)); + return ScalarWalk { + exact: Some(self.resolve_found_local(ident, slot, &env_scopes)), + loose: None, + all_by_name: Vec::new(), + }; } - // 2. Upvalues with matching name — collect as candidates but continue - // walking, because the upvalue may be a different overload (e.g. - // different arity) and the exact match could be in a parent scope. for uv_slot in self.scopes[scope_ptr].find_upvalues_by_name(ident) { all_by_name.push(self.resolve_found_upvalue(ident, uv_slot, &env_scopes)); } - // 3. Compatible candidates (keep only the first scope's matches — shadowing) - if loose_candidates.is_none() { + if loose.is_none() { let candidates = self.scopes[scope_ptr].find_function_candidates(ident, sig); if !candidates.is_empty() { - loose_candidates = Some( + loose = Some( candidates .into_iter() .map(|slot| self.resolve_found_local(ident, slot, &env_scopes)) @@ -387,7 +583,6 @@ impl ScopeTree { } } - // 4. All same-named bindings (accumulate across all scopes) let slots = self.scopes[scope_ptr].find_all_callable_slots_by_name(ident); all_by_name.extend( slots @@ -395,22 +590,23 @@ impl ScopeTree { .map(|slot| self.resolve_found_local(ident, slot, &env_scopes)), ); - // Advance to parent scope if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { if self.scopes[scope_ptr].creates_environment { env_scopes.push(scope_ptr); } scope_ptr = parent_idx; } else { - // Fall through to globals if let Some(slot) = self.global_scope.find_function(ident, sig) { - return Binding::Resolved(ResolvedVar::Global { slot }); + return ScalarWalk { + exact: Some(ResolvedVar::Global { slot }), + loose: None, + all_by_name: Vec::new(), + }; } - - if loose_candidates.is_none() { + if loose.is_none() { let candidates = self.global_scope.find_function_candidates(ident, sig); if !candidates.is_empty() { - loose_candidates = Some( + loose = Some( candidates .into_iter() .map(|slot| ResolvedVar::Global { slot }) @@ -418,25 +614,158 @@ impl ScopeTree { ); } } - all_by_name.extend( self.global_scope .find_all_callable_slots_by_name(ident) .into_iter() .map(|slot| ResolvedVar::Global { slot }), ); - break; } } - if let Some(candidates) = loose_candidates { - return Binding::Dynamic(candidates); + ScalarWalk { + exact: None, + loose, + all_by_name, + } + } + + /// Try to resolve a vec candidate set for `(name, sig)`. + /// + /// * `Static` — at least one arg is statically a non-empty tuple and + /// every tuple-shaped arg shares that length. Positions *may* contain + /// empty candidate lists; the caller surfaces that as a call error. + /// * `AnyFallback` — no static tuple shape but at least one arg is `Any`, + /// so vec might still apply at runtime; the runtime narrows per pair. + /// * `None` — no possible vec interpretation. + fn resolve_vec(&mut self, ident: &str, sig: &[StaticType]) -> Option { + if let Some((axis_len, positions)) = self.resolve_vec_static(ident, sig) { + return Some(VecResolution::Static { + axis_len, + positions, + }); } - if !all_by_name.is_empty() { - return Binding::Dynamic(all_by_name); + if !sig.iter().any(|t| matches!(t, StaticType::Any)) { + return None; } - Binding::None + let permissive: Vec = vec![StaticType::Any; sig.len()]; + let vars = self.candidates_for_sig(ident, &permissive); + if vars.is_empty() { + None + } else { + Some(VecResolution::AnyFallback(vars)) + } + } + + fn resolve_vec_static( + &mut self, + ident: &str, + sig: &[StaticType], + ) -> Option<(usize, Vec>)> { + let mut axis: Option = None; + for arg in sig { + if let StaticType::Tuple(elems) = arg { + if elems.is_empty() { + return None; + } + match axis { + None => axis = Some(elems.len()), + Some(n) if n == elems.len() => {} + _ => return None, + } + } + } + let axis_len = axis?; + + let mut positions = Vec::with_capacity(axis_len); + for i in 0..axis_len { + let pos_sig: Vec = sig + .iter() + .map(|arg| match arg { + StaticType::Tuple(elems) => elems[i].clone(), + other => other.clone(), + }) + .collect(); + positions.push(self.candidates_for_sig(ident, &pos_sig)); + } + Some((axis_len, positions)) + } + + /// Priority-ordered scalar candidates for one signature, used by per-position + /// vec resolution. Exact match (if any) first, then loose-compat candidates + /// with the exact entry deduplicated. + fn candidates_for_sig(&mut self, ident: &str, sig: &[StaticType]) -> Vec { + let walk = self.scalar_walk(ident, sig); + let mut out: Vec = Vec::new(); + if let Some(e) = walk.exact { + out.push(e); + } + if let Some(loose) = walk.loose { + for v in loose { + if !out.contains(&v) { + out.push(v); + } + } + } + out + } + + /// Return type of a scalar candidate. Falls back to `Any` if the binding + /// turned out not to carry a function type (e.g. `Any`-typed callee). + fn scalar_return_type(&self, var: ResolvedVar) -> StaticType { + match self.get_type(var) { + StaticType::Function { return_type, .. } => return_type.as_ref().clone(), + _ => StaticType::Any, + } + } + + /// LUB the return types of a list of scalar candidates. Used when every + /// candidate in a Dynamic binding came from the compat-filtered walk and + /// is therefore guaranteed to be a function. + fn lub_scalar_returns(&self, vars: &[ResolvedVar]) -> StaticType { + vars.iter() + .map(|v| self.scalar_return_type(*v)) + .reduce(|a, b| a.lub(&b)) + .unwrap_or(StaticType::Any) + } + + /// Return type for a `Binding::Dynamic`. See [`ResolvedCall`] for the + /// case breakdown. + fn dynamic_return_type( + &self, + scalar_loose: &[ResolvedVar], + vec_resolution: Option<&VecResolution>, + ) -> StaticType { + let has_scalar = !scalar_loose.is_empty(); + + // Pure scalar: LUB the declared returns. This is the precision-recovery + // case PR #140 had to widen to `Any` for soundness; routing it through + // the LUB now is safe because we know none of the candidates can vec. + if has_scalar && vec_resolution.is_none() { + return self.lub_scalar_returns(scalar_loose); + } + + // Pure static vec: per-position LUB → Tuple<…> of element returns. + if !has_scalar + && let Some(VecResolution::Static { + axis_len, + positions, + }) = vec_resolution + { + let elements: Vec = positions + .iter() + .map(|pos| self.lub_scalar_returns(pos)) + .collect(); + debug_assert_eq!(elements.len(), *axis_len); + return StaticType::Tuple(elements); + } + + // Mixed scalar+vec, Any-fallback vec, or empty (defensive): the + // result has no useful upper bound — widen to `Any`. `LUB(scalar, + // Tuple<…>)` collapses to `Any` in our lattice anyway, so there's + // nothing more precise to compute for the mixed case. + StaticType::Any } pub(crate) fn create_local_binding( diff --git a/ndc_bin/src/highlighter.rs b/ndc_bin/src/highlighter.rs index 110d6962..6a7d09b9 100644 --- a/ndc_bin/src/highlighter.rs +++ b/ndc_bin/src/highlighter.rs @@ -123,6 +123,10 @@ fn collect_function_spans(expr: &ExpressionLocation, spans: &mut AHashSet Expression::Call { function, arguments, + } + | Expression::OperatorCall { + function, + arguments, } => { if let Expression::Identifier { .. } = &function.expression { spans.insert(function.span.offset()); diff --git a/ndc_core/src/static_type.rs b/ndc_core/src/static_type.rs index 3e13ed78..8fc82ab2 100644 --- a/ndc_core/src/static_type.rs +++ b/ndc_core/src/static_type.rs @@ -561,14 +561,6 @@ impl StaticType { Self::Tuple(vec![]) } - #[must_use] - pub fn supports_vectorization(&self) -> bool { - match self { - Self::Tuple(values) => values.iter().all(|v| v.is_number()), - _ => false, - } - } - pub fn is_number(&self) -> bool { matches!( self, @@ -576,25 +568,6 @@ impl StaticType { ) } - #[must_use] - pub fn supports_vectorization_with(&self, other: &Self) -> bool { - match (self, other) { - (Self::Tuple(l), Self::Tuple(r)) - if { - l.len() == r.len() - && self.supports_vectorization() - && other.supports_vectorization() - } => - { - true - } - (tup @ Self::Tuple(_), maybe_num) | (maybe_num, tup @ Self::Tuple(_)) => { - tup.supports_vectorization() && maybe_num.is_number() - } - _ => false, - } - } - // BRUH pub fn is_incompatible_with(&self, other: &Self) -> bool { !self.is_subtype(other) && !other.is_subtype(self) diff --git a/ndc_lsp/src/visitor.rs b/ndc_lsp/src/visitor.rs index c87c1e6a..8037d1a6 100644 --- a/ndc_lsp/src/visitor.rs +++ b/ndc_lsp/src/visitor.rs @@ -133,6 +133,10 @@ fn walk_expression(visitor: &mut impl AstVisitor, expr: &ExpressionLocation) { Expression::Call { function, arguments, + } + | Expression::OperatorCall { + function, + arguments, } => { walk_expression(visitor, function); for arg in arguments { diff --git a/ndc_parser/src/expression.rs b/ndc_parser/src/expression.rs index 03b4fcae..912bd76e 100644 --- a/ndc_parser/src/expression.rs +++ b/ndc_parser/src/expression.rs @@ -22,8 +22,8 @@ impl NodeId { #[derive(Debug, Eq, PartialEq, Clone)] pub enum Binding { None, - Resolved(ResolvedVar), - Dynamic(Vec), // figure it out at runtime + Resolved(Candidate), + Dynamic(Vec), // figure it out at runtime } #[derive(Debug, Eq, PartialEq, Clone, Copy)] @@ -41,6 +41,29 @@ impl ResolvedVar { } } +/// A function overload candidate the analyser picked. The two variants encode +/// what kind of call this candidate is — a direct scalar call, or an element-wise +/// tuple broadcast over the scalar overload that `var()` returns. +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum Candidate { + Scalar(ResolvedVar), + /// Element-wise tuple broadcast. The inner `ResolvedVar` points to the + /// scalar overload that fires for each element pair. + Vec(ResolvedVar), +} + +impl Candidate { + pub fn var(self) -> ResolvedVar { + match self { + Self::Scalar(v) | Self::Vec(v) => v, + } + } + + pub fn is_vec(self) -> bool { + matches!(self, Self::Vec(_)) + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum CaptureSource { Local(usize), @@ -121,6 +144,14 @@ pub enum Expression { function: Box, arguments: Vec, }, + /// Desugared operator syntax: `a + b`, `-x`, `not x`, etc. Distinguished + /// from `Call` so the analyser can apply tuple-broadcast (vec) dispatch + /// rules without leaking the curated list of operator names downstream. + /// Regular function calls never broadcast. + OperatorCall { + function: Box, + arguments: Vec, + }, Tuple { values: Vec, }, diff --git a/ndc_parser/src/lib.rs b/ndc_parser/src/lib.rs index cae9582f..198b87f4 100644 --- a/ndc_parser/src/lib.rs +++ b/ndc_parser/src/lib.rs @@ -3,7 +3,7 @@ mod operator; mod parser; pub use expression::{ - Binding, CaptureSource, Expression, ExpressionLocation, ForBody, ForIteration, + Binding, Candidate, CaptureSource, Expression, ExpressionLocation, ForBody, ForIteration, FunctionParameter, Lvalue, NodeId, ResolvedVar, }; pub use operator::{BinaryOperator, LogicalOperator, UnaryOperator}; diff --git a/ndc_parser/src/operator.rs b/ndc_parser/src/operator.rs index 17b9bbcc..6405c2b3 100644 --- a/ndc_parser/src/operator.rs +++ b/ndc_parser/src/operator.rs @@ -82,22 +82,6 @@ pub enum BinaryOperator { ShiftLeft, } -impl BinaryOperator { - pub fn supports_vectorization(&self) -> bool { - matches!( - self, - Self::Plus - | Self::Minus - | Self::Multiply - | Self::Divide - | Self::FloorDivide - | Self::CModulo - | Self::EuclideanModulo - | Self::Exponent - ) - } -} - #[derive(Debug, Eq, PartialEq, Copy, Clone)] pub enum LogicalOperator { And, diff --git a/ndc_parser/src/parser.rs b/ndc_parser/src/parser.rs index d913a67a..8a298f80 100644 --- a/ndc_parser/src/parser.rs +++ b/ndc_parser/src/parser.rs @@ -198,7 +198,7 @@ impl Parser { // Is this always the same debug_assert_eq!(operator.to_string(), operator_token_loc.token.to_string()); - left = Expression::Call { + left = Expression::OperatorCall { function: Box::new( Expression::Identifier { name: operator_token_loc.token.to_string(), @@ -211,7 +211,7 @@ impl Parser { .to_location(new_span); if let Some(not_token) = invert { - left = Expression::Call { + left = Expression::OperatorCall { function: Box::new( Expression::Identifier { name: not_token.token.to_string(), @@ -243,7 +243,7 @@ impl Parser { let new_span = left.span.merge(right.span); - return Ok(Expression::Call { + return Ok(Expression::OperatorCall { function: Box::new( Expression::Identifier { name: operator.to_string(), @@ -500,7 +500,7 @@ impl Parser { let right = self.logic_not()?; let span = right.span; - Ok(Expression::Call { + Ok(Expression::OperatorCall { function: Box::new( Expression::Identifier { name: operator_token_loc.token.to_string(), @@ -631,7 +631,7 @@ impl Parser { let right = self.tight_unary()?; let span = right.span; - Ok(Expression::Call { + Ok(Expression::OperatorCall { function: Box::new( Expression::Identifier { name: operator_token_loc.token.to_string(), diff --git a/ndc_stdlib/src/serde.rs b/ndc_stdlib/src/serde.rs index 2b2783b4..312be94a 100644 --- a/ndc_stdlib/src/serde.rs +++ b/ndc_stdlib/src/serde.rs @@ -72,7 +72,7 @@ fn value_to_json(value: Value) -> Result { .map(|v| value_to_json(v.clone())) .collect::, _>>()?, )), - Object::Function(_) | Object::OverloadSet(_) => { + Object::Function(_) | Object::OverloadSet { .. } => { Err(anyhow::anyhow!("Unable to serialize function")) } }, diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index 6fda10f0..bad6e333 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -48,6 +48,12 @@ use std::rc::Rc; pub enum OpCode { /// Pops callee and `n` arguments, pushes result. `[… callee a1…an → … result]` Call(usize), + /// Vec-dispatches the callee on top of the stack across tuple arguments. + /// The callee is a single scalar `Function` (loaded directly, not wrapped + /// in an `OverloadSet`): the analyser pinned it at compile time. + /// `[… callee a1…an → … tuple]` where each `ai` is either a tuple of the + /// shared axis length or a scalar that broadcasts unchanged. + CallVec(usize), /// Pops top of stack. `[… value → …]` Pop, /// Unconditional jump. `[…] → […]` @@ -105,6 +111,7 @@ impl std::fmt::Debug for OpCode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Call(n) => write!(f, "Call({n})"), + Self::CallVec(n) => write!(f, "CallVec({n})"), Self::Pop => write!(f, "Pop"), Self::Jump(n) => write!(f, "Jump({n})"), Self::JumpIfTrue(n) => write!(f, "JumpIfTrue({n})"), diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index dddc0aa5..b0624912 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -4,7 +4,7 @@ use crate::{Object, Value}; use ndc_core::{StaticType, TypeSignature}; use ndc_lexer::Span; use ndc_parser::{ - Binding, CaptureSource, Expression, ExpressionLocation, ForBody, ForIteration, + Binding, Candidate, CaptureSource, Expression, ExpressionLocation, ForBody, ForIteration, FunctionParameter, LogicalOperator, Lvalue, ResolvedVar, }; use std::rc::Rc; @@ -203,47 +203,65 @@ impl Compiler { .. } => { let var = resolved.expect("lvalue must be resolved"); - if matches!(resolved_assign_operation, Binding::Resolved(_)) { - // In-place operation (e.g. |=, &=) resolved exactly: modifies - // the value's Rc in place via sync_map_mutations in the bridge, - // so all aliases sharing the Rc see the change. We discard the - // unit return value; the variable slot already holds the - // (now-updated) shared reference. - self.compile_binding(resolved_assign_operation, span)?; - self.emit_get_var(var, lv_span); - self.compile_expr(*r_value)?; - self.chunk.write(OpCode::Call(2), span); - self.chunk.write(OpCode::Pop, span); - } else if let Binding::Dynamic(assign_candidates) = - resolved_assign_operation - { - // Assign-op exists but type was unknown at compile time (Any). - // Build a merged overload set: assign-op candidates first so they - // win for map/string/list args, then binary-op candidates as - // fallback for numeric args. Assign-ops return lhs so SET_VAR - // stores a meaningful value; sync_map_mutations propagates in-place - // changes to VM Rcs via the bridge. - let binary_candidates = match resolved_operation { - Binding::Dynamic(c) => c, - Binding::Resolved(v) => vec![v], - Binding::None => vec![], - }; - let merged: Vec<_> = assign_candidates - .into_iter() - .chain(binary_candidates) - .collect(); - self.compile_binding(Binding::Dynamic(merged), span)?; - self.emit_get_var(var, lv_span); - self.compile_expr(*r_value)?; - self.chunk.write(OpCode::Call(2), span); - self.emit_set_var(var, lv_span); - } else { - // No exact in-place op: call the regular operation and store result. - self.compile_binding(resolved_operation, span)?; - self.emit_get_var(var, lv_span); - self.compile_expr(*r_value)?; - self.chunk.write(OpCode::Call(2), span); - self.emit_set_var(var, lv_span); + match Self::op_assign_strategy(&resolved_assign_operation) { + OpAssignStrategy::InPlaceScalar => { + // `|=`, `&=`, `++=` over a List/Map/String: + // the in-place op mutates the value's Rc and + // returns unit (or the lhs). The slot already + // points at the shared Rc, so just discard. + self.compile_binding(resolved_assign_operation, span)?; + self.emit_get_var(var, lv_span); + self.compile_expr(*r_value)?; + self.chunk.write(OpCode::Call(2), span); + self.chunk.write(OpCode::Pop, span); + } + OpAssignStrategy::DynamicMerge => { + // `op=` exists but either dispatches to + // multiple candidates at runtime, or resolved + // to a single vec candidate that produces a + // fresh tuple. Either way the result must be + // stored back via SetVar — `op=` returns lhs + // when it mutates in place, or a fresh value + // when vec'd. + let (opcode, callee_binding): (OpCode, Binding) = + match resolved_assign_operation { + Binding::Resolved(Candidate::Vec(_)) => { + (OpCode::CallVec(2), resolved_assign_operation) + } + Binding::Dynamic(assign_candidates) => { + // Merge with `op` candidates so the runtime + // dispatcher can fall back to `a op b` shape + // for arg types `op=` doesn't accept. + let mut merged = assign_candidates; + match resolved_operation { + Binding::Dynamic(c) => merged.extend(c), + Binding::Resolved(c) => merged.push(c), + Binding::None => {} + } + (OpCode::Call(2), Binding::Dynamic(merged)) + } + _ => unreachable!( + "DynamicMerge fires only for Resolved(Vec) or Dynamic op=" + ), + }; + self.compile_binding(callee_binding, span)?; + self.emit_get_var(var, lv_span); + self.compile_expr(*r_value)?; + self.chunk.write(opcode, span); + self.emit_set_var(var, lv_span); + } + OpAssignStrategy::FallbackToOp => { + // No `op=` overload: lower to `lhs = lhs op rhs`. + // Vec-resolved `op` (e.g. `a += (3, 4)` on + // `Tuple`) goes through `CallVec` + // for the speed-up; everything else is `Call`. + let opcode = Self::call_opcode_for(&resolved_operation, 2); + self.compile_binding(resolved_operation, span)?; + self.emit_get_var(var, lv_span); + self.compile_expr(*r_value)?; + self.chunk.write(opcode, span); + self.emit_set_var(var, lv_span); + } } } Lvalue::Index { @@ -252,7 +270,6 @@ impl Compiler { resolved_get, resolved_set, } => { - // let getter = ; let container_span = value.span; let index_span = index.span; @@ -274,6 +291,7 @@ impl Compiler { .write(OpCode::GetLocal(tmp_container), container_span); self.chunk.write(OpCode::GetLocal(tmp_index), index_span); + let op_opcode = Self::call_opcode_for(&resolved_operation, 2); self.compile_binding(resolved_operation, span)?; self.compile_binding( resolved_get.expect("[] must be resolved"), @@ -284,7 +302,7 @@ impl Compiler { self.chunk.write(OpCode::GetLocal(tmp_index), index_span); self.chunk.write(OpCode::Call(2), span); // [](container, index) → current_value self.compile_expr(*r_value)?; - self.chunk.write(OpCode::Call(2), span); // op(current_value, r_value) → new_value + self.chunk.write(op_opcode, span); // op(current_value, r_value) → new_value self.chunk.write(OpCode::Call(3), span); // []=(container, index, new_value) self.chunk.write(OpCode::Pop, span); // discard []= result; common code below pushes unit } @@ -342,17 +360,25 @@ impl Compiler { Expression::Call { function, arguments, + } + | Expression::OperatorCall { + function, + arguments, } => { let function_span = function.span; + let opcode = match &function.expression { + Expression::Identifier { resolved, .. } => { + Self::call_opcode_for(resolved, arguments.len()) + } + _ => OpCode::Call(arguments.len()), + }; self.compile_expr(*function)?; - let argument_count = arguments.len(); for argument in arguments { self.compile_expr(argument)?; } - self.chunk - .write(OpCode::Call(argument_count), function_span); + self.chunk.write(opcode, function_span); } Expression::Tuple { values } => { let size = values.len(); @@ -493,14 +519,37 @@ impl Compiler { Ok(()) } + /// Emit code that puts the resolved callee on top of the stack. + /// + /// * `Resolved(Scalar(var))` and `Resolved(Vec(scalar))` both emit a + /// direct `GetVar`; the call-site picks `Call` vs `CallVec` based on + /// which one the analyser chose (see [`Self::call_opcode_for`]). + /// * `Dynamic(candidates)` pushes the candidate list as an `OverloadSet` + /// constant; the VM dispatcher narrows at runtime. fn compile_binding(&mut self, resolved: Binding, span: Span) -> Result<(), CompileError> { match resolved { Binding::None => return Err(CompileError::unresolved_binding(span)), - Binding::Resolved(var) => self.emit_get_var(var, span), + Binding::Resolved(candidate) => self.emit_get_var(candidate.var(), span), Binding::Dynamic(candidates) => { + // Split candidates by kind: scalars walked first (hot path), + // vec candidates only consulted as fallback. Keeps the + // scalar-only call site cost identical to master. + let (scalars, vec_candidates) = + candidates + .into_iter() + .fold((Vec::new(), Vec::new()), |mut acc, c| { + match c { + Candidate::Scalar(v) => acc.0.push(v), + Candidate::Vec(v) => acc.1.push(v), + } + acc + }); let idx = self .chunk - .add_constant(Value::Object(Rc::new(Object::OverloadSet(candidates)))); + .add_constant(Value::Object(Rc::new(Object::OverloadSet { + scalars, + vec_candidates, + }))); self.chunk.write(OpCode::Constant(idx), span); } } @@ -508,6 +557,17 @@ impl Compiler { Ok(()) } + /// Pick the call opcode for a given binding: `CallVec(args)` when the + /// analyser pinned a single vec candidate at compile time, else + /// `Call(args)`. Dynamic bindings always use `Call` and let the VM's + /// `find_overload` route to scalar vs vec dispatch. + fn call_opcode_for(binding: &Binding, args: usize) -> OpCode { + match binding { + Binding::Resolved(Candidate::Vec(_)) => OpCode::CallVec(args), + _ => OpCode::Call(args), + } + } + fn emit_get_var(&mut self, var: ResolvedVar, span: Span) { match var { ResolvedVar::Local { slot } => self.chunk.write(OpCode::GetLocal(slot), span), @@ -843,6 +903,34 @@ struct LoopContext { break_instructions: Vec, } +/// Which lowering shape an `op=` site takes. +enum OpAssignStrategy { + /// A scalar `op=` overload exists and was resolved exactly. The op + /// mutates the value's Rc in place; the result is discarded. + InPlaceScalar, + /// `op=` resolved to multiple candidates. Merge with `op` candidates and + /// dispatch at runtime; store the result back. + DynamicMerge, + /// No `op=` overload — lower to `lhs = lhs op rhs`. + FallbackToOp, +} + +impl Compiler { + fn op_assign_strategy(op_assign: &Binding) -> OpAssignStrategy { + match op_assign { + Binding::Resolved(Candidate::Scalar(_)) => OpAssignStrategy::InPlaceScalar, + // A vec-resolved op= would produce a fresh tuple result that must + // be stored back; the merge path handles that correctly via + // SetVar after the call. (In practice the stdlib has no such + // overload, but this keeps the contract uniform.) + Binding::Resolved(Candidate::Vec(_)) | Binding::Dynamic(_) => { + OpAssignStrategy::DynamicMerge + } + Binding::None => OpAssignStrategy::FallbackToOp, + } + } +} + /// Returns the minimum local slot referenced by an lvalue, used to determine /// which upvalues to close at the end of a loop iteration. fn min_lvalue_slot(lv: &Lvalue) -> Option { diff --git a/ndc_vm/src/value/mod.rs b/ndc_vm/src/value/mod.rs index 7491b7e7..4684b641 100644 --- a/ndc_vm/src/value/mod.rs +++ b/ndc_vm/src/value/mod.rs @@ -48,7 +48,16 @@ pub enum Object { default: Option, }, Function(Function), - OverloadSet(Vec), + /// A set of overload candidates the runtime narrows per call. Scalars are + /// walked first (first-match-wins, same shape as the master baseline); + /// `vec_candidates` is only consulted as a fallback for operator-form + /// calls when no scalar accepts the args. `vec_candidates` is empty for + /// non-operator call sites, so the hot path stays a single `Vec` + /// walk identical to scalar-only dispatch. + OverloadSet { + scalars: Vec, + vec_candidates: Vec, + }, Iterator(SharedIterator), Deque(RefCell>), MinHeap(RefCell>>), @@ -488,7 +497,7 @@ impl Object { } } Self::Function(f) => f.static_type(), - Self::OverloadSet(_) => StaticType::Any, + Self::OverloadSet { .. } => StaticType::Any, Self::Iterator(_) => StaticType::Iterator(Box::new(StaticType::Any)), Self::Deque(elements) => { let elements = elements.borrow(); @@ -567,7 +576,14 @@ impl fmt::Display for Object { write!(f, "}}") } Self::Function(func) => write!(f, "{func}"), - Self::OverloadSet(slots) => write!(f, "", slots.len()), + Self::OverloadSet { + scalars, + vec_candidates, + } => write!( + f, + "", + scalars.len() + vec_candidates.len() + ), Self::Iterator(iter) => match iter.borrow().len() { Some(n) => write!(f, ""), None => write!(f, ""), @@ -609,7 +625,14 @@ impl fmt::Debug for Object { .field("default", default) .finish(), Self::Function(func) => write!(f, "{func:?}"), - Self::OverloadSet(slots) => f.debug_tuple("OverloadSet").field(slots).finish(), + Self::OverloadSet { + scalars, + vec_candidates, + } => f + .debug_struct("OverloadSet") + .field("scalars", scalars) + .field("vec_candidates", vec_candidates) + .finish(), Self::Iterator(iter) => match iter.borrow().len() { Some(n) => write!(f, ""), None => write!(f, ""), @@ -804,7 +827,7 @@ impl PartialEq for Object { _ => false, } } - (Self::OverloadSet(_), Self::OverloadSet(_)) => { + (Self::OverloadSet { .. }, Self::OverloadSet { .. }) => { panic!("OverloadSet cannot be used as a map key") } (Self::Iterator(a), Self::Iterator(b)) => { @@ -901,7 +924,7 @@ impl Hash for Object { } } } - Self::OverloadSet(_) => { + Self::OverloadSet { .. } => { panic!("OverloadSet cannot be used as a map key") } Self::Iterator(iter) => { diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 6ae2959d..a61daf64 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -263,8 +263,11 @@ impl Vm { e.span.get_or_insert(span); return Err(e); } - } else if let Some(result) = self.try_vectorized_call(args, span)? { - self.stack.push(result); + } else if let Some((scalars, axis_len)) = self.try_vec_dispatch(args) { + if let Err(mut e) = self.dispatch_vec_call(&scalars, args, axis_len, span) { + e.span.get_or_insert(span); + return Err(e); + } } else { let arg_types: Vec<_> = self.stack[self.stack.len() - args..] .iter() @@ -285,6 +288,49 @@ impl Vm { )); } } + OpCode::CallVec(args) => { + let args = *args; + // Compiler emitted a directly-loaded scalar function — no + // OverloadSet probing needed. We still verify it's callable + // because the analyser may have widened its slot to Any + // somewhere along the way. + let scalar = { + let callee = &self.stack[self.stack.len() - args - 1]; + let Value::Object(obj) = callee else { + return Err(VmError::new( + format!("Unable to invoke {} as a function.", callee.static_type()), + span, + )); + }; + let Object::Function(f) = obj.as_ref() else { + return Err(VmError::new( + format!( + "Unable to invoke {} as a function.", + obj.as_ref().static_type() + ), + span, + )); + }; + f.clone() + }; + let axis_len = match vec_axis_len(&self.stack[self.stack.len() - args..]) { + Some(n) => n, + None => { + return Err(VmError::new( + format!( + "vec call '{}' requires tuple arguments of equal non-zero length", + scalar.name().unwrap_or("?") + ), + span, + )); + } + }; + let scalars = [scalar]; + if let Err(mut e) = self.dispatch_vec_call(&scalars, args, axis_len, span) { + e.span.get_or_insert(span); + return Err(e); + } + } OpCode::MakeList(size) => { let size = *size; let data = self.stack.split_off(self.stack.len() - size); @@ -739,21 +785,27 @@ impl Vm { }]; } - /// Resolves the callee on the stack to a concrete `Function`. Returns `Ok(None)` - /// when the callee is an overload set and no candidate matches the argument - /// types — the caller should then try a vectorized fallback. Returns `Err` when - /// the callee is not callable at all. + /// Resolves the callee on the stack to a scalar `Function`. Direct + /// `Object::Function` and `OverloadSet` scalars both go through this + /// path — for an `OverloadSet`, the scalar list is walked first-match- + /// wins (no vec dispatch yet). `Ok(None)` means no scalar matched; the + /// caller falls through to vec dispatch. + /// + /// Returns `Option` (not the wider `Callable` enum) so the + /// arm body in the dispatch loop stays the same shape as master — that + /// matters: earlier drafts that widened the return type or split this + /// into two helpers regressed numerics-heavy benches by ~10%. fn resolve_callee(&self, args: usize) -> Result, String> { match &self.stack[self.stack.len() - args - 1] { Value::Object(obj) => match obj.as_ref() { Object::Function(f) => Ok(Some(f.clone())), - Object::OverloadSet(candidates) => { + Object::OverloadSet { scalars, .. } => { let start = self.stack.len() - args; - Ok(self.find_overload(candidates, &self.stack[start..])) + Ok(self.find_scalar_overload(scalars, &self.stack[start..])) } - obj => Err(format!( + other => Err(format!( "Unable to invoke {} as a function.", - obj.static_type() + other.static_type() )), }, callee => Err(format!( @@ -763,16 +815,55 @@ impl Vm { } } + /// Vec-dispatch fallback for `OpCode::Call`. Returns the matched vec + /// scalars and the broadcast axis length when the callee is an + /// `OverloadSet` with at least one vec candidate *and* the args have + /// a consistent tuple axis length (`vec_axis_len`). + /// + /// Returns `None` to mean "vec doesn't apply" — caller errors out. + fn try_vec_dispatch(&self, args: usize) -> Option<(Vec, usize)> { + let Value::Object(obj) = &self.stack[self.stack.len() - args - 1] else { + return None; + }; + let Object::OverloadSet { vec_candidates, .. } = obj.as_ref() else { + return None; + }; + if vec_candidates.is_empty() { + return None; + } + let start = self.stack.len() - args; + let axis_len = vec_axis_len(&self.stack[start..])?; + let frame_pointer = self.frames.last().expect("no frame").frame_pointer; + let mut scalars: Vec = Vec::with_capacity(vec_candidates.len()); + for var in vec_candidates { + let value = self.resolve_var(var, frame_pointer); + let Value::Object(obj) = value else { continue }; + let Object::Function(f) = obj.as_ref() else { + continue; + }; + scalars.push(f.clone()); + } + if scalars.is_empty() { + None + } else { + Some((scalars, axis_len)) + } + } + /// Returns the name of the callee at the given stack position, if known. /// For a direct `Function` this is its own name; for an `OverloadSet` it - /// reads the name off the first resolved candidate. + /// reads the name off the first scalar (or vec) candidate. fn callee_name(&self, args: usize) -> Option { let frame_pointer = self.frames.last()?.frame_pointer; match &self.stack[self.stack.len() - args - 1] { Value::Object(obj) => match obj.as_ref() { Object::Function(f) => f.name().map(str::to_string), - Object::OverloadSet(candidates) => candidates.first().and_then(|var| { - let value = self.resolve_var(var, frame_pointer); + Object::OverloadSet { + scalars, + vec_candidates, + } => { + let first = scalars.first().or_else(|| vec_candidates.first())?; + let value = self.resolve_var(first, frame_pointer); let Value::Object(obj) = value else { return None; }; @@ -780,18 +871,20 @@ impl Vm { return None; }; f.name().map(str::to_string) - }), + } _ => None, }, _ => None, } } - /// Searches an overload set for the first candidate whose type signature - /// accepts the given argument types. - fn find_overload(&self, candidates: &[ResolvedVar], args: &[Value]) -> Option { + /// Walks the `OverloadSet`'s scalar candidates in priority order, + /// returning the first whose parameter types accept `args`. Mirrors + /// master's `find_overload` body so the hot path on numerics-heavy + /// code stays at the same shape and footprint. + fn find_scalar_overload(&self, scalars: &[ResolvedVar], args: &[Value]) -> Option { let frame_pointer = self.frames.last().expect("no frame").frame_pointer; - candidates.iter().find_map(|var| { + scalars.iter().find_map(|var| { let value = self.resolve_var(var, frame_pointer); let Value::Object(obj) = value else { return None; @@ -803,76 +896,80 @@ impl Vm { }) } - /// Applies a binary operator element-wise over numeric tuples. + /// Broadcast `scalars` across `axis_len` element positions, looking up the + /// matching scalar per pair when more than one is in play. Non-tuple args + /// broadcast unchanged. /// - /// Returns `Some(result_tuple)` when both arguments (or one argument and - /// one scalar) are numeric tuples of compatible shape and an inner function - /// can be found for the element types. Returns `None` when vectorization - /// does not apply. - fn try_vectorized_call(&mut self, args: usize, span: Span) -> Result, VmError> { - if args != 2 { - return Ok(None); - } + /// **Fast path**: when `scalars` has exactly one entry — which is the + /// `CallVec` opcode case and the "pinned single scalar" `Resolved(Vec)` + /// case at runtime — the per-element overload probe is skipped and the + /// scalar is called directly. This is the bulk of the win over the PR's + /// always-probe dispatcher. + fn dispatch_vec_call( + &mut self, + scalars: &[Function], + args: usize, + axis_len: usize, + span: Span, + ) -> Result<(), VmError> { + let arg_start = self.stack.len() - args; + let callee_name = self.callee_name(args); - let callee_idx = self.stack.len() - args - 1; + // Materialise the broadcast arguments up front so the inner + // call_callback can hold &mut self without conflicting with stack + // borrows. Element values clone Rcs only. + let arg_values: Vec = self.stack.split_off(arg_start); + self.stack.pop(); // discard the callee slot - // Use a block to scope all shared borrows of self.stack so they are - // dropped before the &mut self calls (call_callback, truncate) below. - let (inner_fn, pairs) = { - // P5: borrow candidates rather than cloning the Vec. - let candidates: &[ResolvedVar] = match &self.stack[callee_idx] { - Value::Object(obj) => match obj.as_ref() { - Object::OverloadSet(candidates) => candidates, - _ => return Ok(None), - }, - _ => return Ok(None), - }; + let pinned: Option<&Function> = if scalars.len() == 1 { + Some(&scalars[0]) + } else { + None + }; - let left = &self.stack[self.stack.len() - 2]; - let right = &self.stack[self.stack.len() - 1]; - - // Check shape, build a two-element probe for overload lookup, and - // extract the element pairs all in one pass — avoiding the redundant - // as_numeric_tuple calls that a separate vectorization_pairs would do. - let left_tup = as_numeric_tuple(left); - let right_tup = as_numeric_tuple(right); - let (probe, pairs): ([Value; 2], Vec<(Value, Value)>) = match (left_tup, right_tup) { - (Some(ls), Some(rs)) if ls.len() == rs.len() => ( - [ls[0].clone(), rs[0].clone()], - ls.iter().cloned().zip(rs.iter().cloned()).collect(), - ), - (None, Some(rs)) if left.is_number() => ( - [left.clone(), rs[0].clone()], - rs.iter().map(|r| (left.clone(), r.clone())).collect(), - ), - (Some(ls), None) if right.is_number() => ( - [ls[0].clone(), right.clone()], - ls.iter().map(|l| (l.clone(), right.clone())).collect(), - ), - _ => return Ok(None), - }; + let mut elem_args: Vec = Vec::with_capacity(args); + let mut results: Vec = Vec::with_capacity(axis_len); - let Some(inner_fn) = self.find_overload(candidates, &probe) else { - return Ok(None); - }; + for i in 0..axis_len { + elem_args.clear(); + for arg in &arg_values { + elem_args.push(vec_element_at(arg, i)); + } - (inner_fn, pairs) - }; + let scalar: Function = if let Some(f) = pinned { + f.clone() + } else { + let Some(found) = scalars.iter().find(|f| f.matches_value_args(&elem_args)) else { + let element_types = elem_args + .iter() + .map(|v| v.static_type().to_string()) + .collect::>() + .join(", "); + let name = callee_name.as_deref().unwrap_or("?"); + return Err(VmError::new( + format!("no overload of '{name}' accepts element {i}: ({element_types})"), + span, + )); + }; + found.clone() + }; - let mut results = Vec::with_capacity(pairs.len()); - for (l, r) in pairs { - let v = self - .call_callback(inner_fn.clone(), vec![l, r]) - .map_err(|mut e| { - e.span.get_or_insert(span); - e - })?; - results.push(v); + let call_args = std::mem::replace(&mut elem_args, Vec::with_capacity(args)); + let result = self.call_callback(scalar, call_args).map_err(|mut e| { + let prefix = match &callee_name { + Some(name) => format!("while vectorising '{name}' at index {i}: "), + None => format!("while vectorising at index {i}: "), + }; + e.message = format!("{prefix}{}", e.message); + e.span.get_or_insert(span); + e + })?; + results.push(result); } - // Replace callee + args on the stack with the result tuple. - self.stack.truncate(callee_idx); - Ok(Some(Value::Object(Rc::new(Object::Tuple(results))))) + self.stack + .push(Value::Object(Rc::new(Object::Tuple(results)))); + Ok(()) } /// Pops a value from the stack and pushes `size` unpacked elements back. @@ -976,21 +1073,38 @@ impl Vm { } } -/// If `value` is a tuple whose elements are all numeric, returns a reference to -/// its element vec. Returns `None` for empty tuples, non-tuples, or tuples -/// that contain non-numeric elements. -fn as_numeric_tuple(value: &Value) -> Option<&Vec> { - let Value::Object(obj) = value else { - return None; - }; - let Object::Tuple(elems) = obj.as_ref() else { - return None; - }; - if !elems.is_empty() && elems.iter().all(|e| e.is_number()) { - Some(elems) - } else { - None +/// Find the broadcast-axis length for a vec call: the shared length of every +/// tuple-shaped argument. Empty tuples and length mismatches return `None`, +/// which is interpreted upstream as "vec doesn't apply" and falls through to +/// the regular `no function found` error. +pub(crate) fn vec_axis_len(args: &[Value]) -> Option { + let mut axis: Option = None; + for arg in args { + if let Value::Object(obj) = arg + && let Object::Tuple(elems) = obj.as_ref() + { + if elems.is_empty() { + return None; + } + match axis { + None => axis = Some(elems.len()), + Some(n) if n == elems.len() => {} + _ => return None, + } + } + } + axis +} + +/// Pick the per-element value at vec position `i`. Tuple args contribute +/// `tuple[i]`; non-tuple args broadcast unchanged. +pub(crate) fn vec_element_at(arg: &Value, i: usize) -> Value { + if let Value::Object(obj) = arg + && let Object::Tuple(elems) = obj.as_ref() + { + return elems[i].clone(); } + arg.clone() } impl CallFrame { diff --git a/tests/compiler/Cargo.toml b/tests/compiler/Cargo.toml index 372fee04..6cb28055 100644 --- a/tests/compiler/Cargo.toml +++ b/tests/compiler/Cargo.toml @@ -7,4 +7,5 @@ version.workspace = true ndc_interpreter.workspace = true ndc_lexer.workspace = true ndc_parser.workspace = true +ndc_stdlib.workspace = true ndc_vm.workspace = true diff --git a/tests/compiler/tests/compiler.rs b/tests/compiler/tests/compiler.rs index b3a641ce..2f979ce7 100644 --- a/tests/compiler/tests/compiler.rs +++ b/tests/compiler/tests/compiler.rs @@ -24,6 +24,19 @@ fn compile_with_analysis(input: &str) -> Vec { .to_vec() } +/// Like [`compile_with_analysis`] but also loads the standard library so +/// tests can exercise operator overloads (`+`, `*`, `++`, …) by name. Cheap +/// enough to set up fresh per test. +fn compile_with_stdlib(input: &str) -> Vec { + let mut interp = ndc_interpreter::Interpreter::capturing(); + interp.configure(ndc_stdlib::register); + interp + .compile_str(input) + .expect("compile failed") + .opcodes() + .to_vec() +} + // if true { 1 } // // 0: Constant(0) push `true` @@ -338,3 +351,57 @@ fn test_block_scope_cleanup_multiple_locals() { ] ); } + +// (1, 2) + (3, 4) +// +// The analyser pins this to a single scalar `+(Int, Int)` overload broadcast +// across two positions. The compiler emits a direct GetGlobal load of the +// scalar function — no OverloadSet allocation — and a CallVec opcode that +// vec-dispatches at runtime. +#[test] +fn test_vec_call_homogeneous_resolved() { + let ops = compile_with_stdlib("(1, 2) + (3, 4)"); + let call_op = ops + .iter() + .rev() + .find(|op| matches!(op, CallVec(_) | Call(_))); + assert_eq!( + call_op, + Some(&CallVec(2)), + "Resolved(Vec) call should compile to CallVec(2), got: {ops:?}", + ); +} + +// (1, "a") + (2, "b") +// +// Mixed-element tuple over `+`: position 0 resolves to `+(Int, Int)`, but +// position 1 has `(String, String)` and there's no `+(String, String)` +// overload. The analyser surfaces this as Binding::None, which the +// compiler refuses to lower — `compile` must fail. +#[test] +fn test_vec_call_per_position_failure_errors() { + let mut interp = ndc_interpreter::Interpreter::capturing(); + interp.configure(ndc_stdlib::register); + assert!( + interp.compile_str("(1, \"a\") + (2, \"b\")").is_err(), + "mixed-element vec call should fail compilation", + ); +} + +// fn id(x) { x }; id((1, 2, 3)) +// +// Regular function call (not operator syntax) must NEVER compile to CallVec +// even when the argument is a tuple — vec dispatch is gated to operator +// syntax via the `OperatorCall` AST variant. +#[test] +fn test_regular_call_with_tuple_arg_does_not_vec() { + let ops = compile_with_stdlib("fn id(x) { x }; id((1, 2, 3))"); + assert!( + ops.iter().all(|op| !matches!(op, CallVec(_))), + "regular call must not lower to CallVec, got: {ops:?}", + ); + assert!( + ops.iter().any(|op| matches!(op, Call(1))), + "expected a Call(1) for id((1, 2, 3)), got: {ops:?}", + ); +} diff --git a/tests/functional/build.rs b/tests/functional/build.rs index 56612338..eedf76ae 100644 --- a/tests/functional/build.rs +++ b/tests/functional/build.rs @@ -24,7 +24,7 @@ fn generate_tests(output: &mut impl Write, base: &Path, dir: &Path) { let path = entry.path(); if path.is_dir() { generate_tests(output, base, &path); - } else if path.extension().map_or(false, |e| e == "ndc") { + } else if path.extension().is_some_and(|e| e == "ndc") { let relative = path.strip_prefix(base).unwrap(); let stem = relative .with_extension("") diff --git a/tests/functional/programs/013_vector_math/003_vector_error2.ndc b/tests/functional/programs/013_vector_math/003_vector_error2.ndc index 49542e40..d8b9546d 100644 --- a/tests/functional/programs/013_vector_math/003_vector_error2.ndc +++ b/tests/functional/programs/013_vector_math/003_vector_error2.ndc @@ -1,3 +1,4 @@ -// expect-error: no function called '+' found matches the arguments -// This looks valid, but isn't +// expect-error: No function called '+' found that matches the arguments +// Per-position vec resolution catches this at compile time: position 2 has +// nested tuples and `+` has no overload that accepts `Tuple` operands. (1,1,(1,)) + (1,1,(1,)) diff --git a/tests/functional/programs/013_vector_math/004_vector_unary.ndc b/tests/functional/programs/013_vector_math/004_vector_unary.ndc new file mode 100644 index 00000000..944f8bba --- /dev/null +++ b/tests/functional/programs/013_vector_math/004_vector_unary.ndc @@ -0,0 +1,9 @@ +// Unary operators broadcast across a tuple. +assert_eq(-(1, 2, 3), (-1, -2, -3)); +assert_eq(-(1.5, 2.5), (-1.5, -2.5)); + +// Bitwise negation also vec's. +assert_eq(~(1, 2, 3), (-2, -3, -4)); + +// Boolean negation across a tuple of bools. +assert_eq(!(true, false, true), (false, true, false)); diff --git a/tests/functional/programs/013_vector_math/005_vector_non_numeric.ndc b/tests/functional/programs/013_vector_math/005_vector_non_numeric.ndc new file mode 100644 index 00000000..0e26637f --- /dev/null +++ b/tests/functional/programs/013_vector_math/005_vector_non_numeric.ndc @@ -0,0 +1,7 @@ +// Non-numeric vec: `++` over tuples of strings dispatches per-element. +assert_eq(("a", "b") ++ ("c", "d"), ("ac", "bd")); +assert_eq(("hello", "foo") ++ (" world", "bar"), ("hello world", "foobar")); + +// Vec over tuples of lists. +assert_eq(([1], [2]) ++ ([3], [4]), ([1, 3], [2, 4])); +assert_eq(([1, 2], []) ++ ([3], [4, 5]), ([1, 2, 3], [4, 5])); diff --git a/tests/functional/programs/013_vector_math/006_vector_mixed_elements.ndc b/tests/functional/programs/013_vector_math/006_vector_mixed_elements.ndc new file mode 100644 index 00000000..89f5649d --- /dev/null +++ b/tests/functional/programs/013_vector_math/006_vector_mixed_elements.ndc @@ -0,0 +1,6 @@ +// expect-error: No function called '+' found that matches the arguments +// Per-position resolution catches this at compile time: position 1 has +// element types `(String, String)` and `+` has no overload accepting +// strings. Position 0 would resolve to `+(Int, Int)`, but one bad position +// fails the whole call. +(1, "a") + (2, "b") diff --git a/tests/functional/programs/013_vector_math/007_regular_call_no_vec.ndc b/tests/functional/programs/013_vector_math/007_regular_call_no_vec.ndc new file mode 100644 index 00000000..dc2599a0 --- /dev/null +++ b/tests/functional/programs/013_vector_math/007_regular_call_no_vec.ndc @@ -0,0 +1,9 @@ +// A regular function call must NOT vec over its tuple argument. +// `id((1, 2, 3))` returns the tuple verbatim; vec dispatch is reserved +// for operator-form calls only. +fn id(x) { x }; +assert_eq(id((1, 2, 3)), (1, 2, 3)); + +// Same shape but the argument is a tuple of tuples — proves the call +// shape isn't being unwrapped. +assert_eq(id(((1, 2), (3, 4))), ((1, 2), (3, 4))); diff --git a/tests/functional/programs/013_vector_math/008_vector_chain_precision.ndc b/tests/functional/programs/013_vector_math/008_vector_chain_precision.ndc new file mode 100644 index 00000000..9faa3d80 --- /dev/null +++ b/tests/functional/programs/013_vector_math/008_vector_chain_precision.ndc @@ -0,0 +1,13 @@ +// The analyser used to widen any operator call to `Any` after PR #140 to +// keep dispatch sound. With per-binding vec tracking we can pin the +// result type, so chained operator calls keep dispatching to the +// precise scalar overload. +let v = (1, 2) + (3, 4); +let w = v + (10, 20); +let x = w * (2, 1); +assert_eq(x, (28, 26)); + +// Same chain mixing scalar broadcast with full tuple form. +let a = (1, 2) + 10; +let b = a * (3, 4); +assert_eq(b, (33, 48)); diff --git a/tests/functional/programs/013_vector_math/009_vector_exact_match_precision.ndc b/tests/functional/programs/013_vector_math/009_vector_exact_match_precision.ndc new file mode 100644 index 00000000..d9b003cd --- /dev/null +++ b/tests/functional/programs/013_vector_math/009_vector_exact_match_precision.ndc @@ -0,0 +1,25 @@ +// Vec dispatch should pick the most specific scalar overload by subtype +// (mirroring scalar dispatch's `find_function` precedence), not collapse +// to the LUB of every compatible overload. `Tuple - Tuple` must infer as `Tuple`, not `Tuple`. +let a: Tuple = (1, 2); +let b: Tuple = (3, 4); +let c: Tuple = a - b; +let d: Tuple = c * c; +assert_eq(d, (4, 4)); + +// Chained: `+` keeps the precise element type through several operators. +let e: Tuple = a + b; +let f: Tuple = e + (10, 20); +assert_eq(f, (14, 26)); + +// Any args fall to LUB (no scalar `-(Any, Any)` exists, so vec dispatch +// can't pin a single overload at compile time): `Tuple - +// Tuple` infers as `Tuple`, the LUB across +// every numeric overload's return type. +let l: List = [1, 2]; +let p: Tuple = (l.first, l.first); +let q: Tuple = (l.last, l.last); +let r: Tuple = p - q; +let s: Tuple = r * r; +assert_eq(s, (1, 1)); diff --git a/tests/functional/programs/013_vector_math/010_vector_op_assignment.ndc b/tests/functional/programs/013_vector_math/010_vector_op_assignment.ndc new file mode 100644 index 00000000..88881cd5 --- /dev/null +++ b/tests/functional/programs/013_vector_math/010_vector_op_assignment.ndc @@ -0,0 +1,23 @@ +// Compound assignment must use the vec return type, not the underlying +// scalar's return. `a += (3, 4)` on a `Tuple` lvalue must +// widen-check against `Tuple`, not `Int`. +let a: Tuple = (1, 2); +a += (3, 4); +assert_eq(a, (4, 6)); + +let b: Tuple = (10, 20); +b -= (1, 2); +assert_eq(b, (9, 18)); + +// Scalar broadcast on the right-hand side. +let c: Tuple = (3, 4); +c *= 2; +assert_eq(c, (6, 8)); + +// Without an annotation the inferred lvalue type must also widen +// correctly. `d` starts as `Tuple` and a follow-up read +// must still see the precise tuple element types. +let d = (1, 2); +d += (10, 20); +let e: Tuple = d + (100, 200); +assert_eq(e, (111, 222)); diff --git a/tests/functional/programs/013_vector_math/011_vector_op_assign_aliasing.ndc b/tests/functional/programs/013_vector_math/011_vector_op_assign_aliasing.ndc new file mode 100644 index 00000000..e53757ba --- /dev/null +++ b/tests/functional/programs/013_vector_math/011_vector_op_assign_aliasing.ndc @@ -0,0 +1,16 @@ +// Vec-resolved op= must preserve the in-place aliasing contract that +// scalar op= guarantees: inner Rc mutations stay visible through every +// alias of the originals, and the variable itself still reads as the +// updated value. +let l1 = [1, 2]; +let l2 = [3, 4]; +let pair = (l1, l2); +let outer_alias = pair; +pair ++= ([5], [6]); +// Inner-list mutation visible through the original bindings. +assert_eq(l1, [1, 2, 5]); +assert_eq(l2, [3, 4, 6]); +// Outer alias still sees the same inner lists, now mutated. +assert_eq(outer_alias, ([1, 2, 5], [3, 4, 6])); +// pair itself reads as the updated value. +assert_eq(pair, ([1, 2, 5], [3, 4, 6])); diff --git a/tests/functional/programs/013_vector_math/012_vector_heterogeneous.ndc b/tests/functional/programs/013_vector_math/012_vector_heterogeneous.ndc new file mode 100644 index 00000000..bb9efd17 --- /dev/null +++ b/tests/functional/programs/013_vector_math/012_vector_heterogeneous.ndc @@ -0,0 +1,19 @@ +// Per-position vec dispatch: positions can resolve to different scalar +// overloads. Element 0 dispatches `++(List, List)`; element 1 dispatches +// `++(String, String)`. The result tuple has different element types per +// position. +assert_eq( + ([1, 2, 3], "foo") ++ ([4, 5, 6], "bar"), + ([1, 2, 3, 4, 5, 6], "foobar") +); + +// Same shape, longer axis with three distinct overloads per position. +assert_eq( + ([1], "a", [10]) ++ ([2], "b", [20]), + ([1, 2], "ab", [10, 20]) +); + +// Mixed numerics: position 0 stays Int; position 1 needs Number coercion +// because of the Float on the left. +let mixed = (1, 1.5) + (2, 3); +assert_eq(mixed, (3, 4.5)); diff --git a/tests/functional/programs/013_vector_math/013_vector_per_position_no_overload.ndc b/tests/functional/programs/013_vector_math/013_vector_per_position_no_overload.ndc new file mode 100644 index 00000000..85bac436 --- /dev/null +++ b/tests/functional/programs/013_vector_math/013_vector_per_position_no_overload.ndc @@ -0,0 +1,5 @@ +// expect-error: No function called '+' found that matches the arguments +// Position 1 has element types `(Bool, Bool)` and `+` has no overload +// that accepts booleans. The per-position lookup catches this at compile +// time even though position 0's `(Int, Int)` would resolve cleanly. +let r = (1, true) + (2, false); diff --git a/tests/functional/programs/900_bugs/bug0021_combinations_lazy_source.ndc b/tests/functional/programs/900_bugs/bug0022_combinations_lazy_source.ndc similarity index 100% rename from tests/functional/programs/900_bugs/bug0021_combinations_lazy_source.ndc rename to tests/functional/programs/900_bugs/bug0022_combinations_lazy_source.ndc diff --git a/tests/functional/programs/900_bugs/bug0023_incompat_dynamic_misinferred.ndc b/tests/functional/programs/900_bugs/bug0023_incompat_dynamic_misinferred.ndc new file mode 100644 index 00000000..7d9b7363 --- /dev/null +++ b/tests/functional/programs/900_bugs/bug0023_incompat_dynamic_misinferred.ndc @@ -0,0 +1,9 @@ +// expect-error: mismatched types: found Any but expected String +// `returns_int(42)` has no compatible overload — `returns_int` only takes +// a `String`. Before this fix, the analyser LUB'd every candidate's +// declared return regardless of compatibility, so the call inferred as +// `Int` and the assignment errored with the misleading "found Int but +// expected String". Dynamic with all-by-name fallback now widens to +// `Any`, so the mismatch surfaces against `Any` instead. +fn returns_int(s: String) -> Int { 1 }; +let x: String = returns_int(42); From 14379d55a29ba1ee3bf6674ad8651233724bdd7a Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Sun, 24 May 2026 11:14:39 +0200 Subject: [PATCH 2/3] =?UTF-8?q?fix(analyser):=20keep=20both=20scalar=20and?= =?UTF-8?q?=20vec=20candidates=20when=20a=20slot=20has=20both=20?= =?UTF-8?q?=F0=9F=94=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `extend_dedup` was comparing `Candidate`s by inner `ResolvedVar`, so vec candidates were stripped from the Dynamic binding's candidate list when their scalar twin (same slot) had already been added. The compiler then emitted an `OverloadSet` with no `vec_candidates`, and any call where both args were statically `Any` but turned out to be tuples at runtime (e.g. `a - b` where `a, b` were produced by `combinations(2)`-style destructuring) fell through to the "no function found" error. Also speed up runtime vec dispatch for the heterogeneous-element case: * `Vm::dispatch_vec_call_dynamic` resolves vec candidates lazily from `&[ResolvedVar]` instead of materialising a `Vec` up front on every outer call — matches master's `try_vectorized_call` pattern. * Both vec dispatchers now cache the last-matched scalar across positions, so homogeneous tuples (the common shape, including the AoC 2025/08 hot loop) pay one candidate probe per outer call. Brings the AoC 2025/08 part1 regression from +22% to +8% vs master while keeping every other bench at parity or better. Co-Authored-By: Claude Opus 4.7 (1M context) --- ndc_analyser/src/scope.rs | 14 +- ndc_vm/src/vm.rs | 165 +++++++++++++----- .../014_vector_any_typed_args.ndc | 13 ++ 3 files changed, 145 insertions(+), 47 deletions(-) create mode 100644 tests/functional/programs/013_vector_math/014_vector_any_typed_args.ndc diff --git a/ndc_analyser/src/scope.rs b/ndc_analyser/src/scope.rs index 5de8a0f6..f3ff3b73 100644 --- a/ndc_analyser/src/scope.rs +++ b/ndc_analyser/src/scope.rs @@ -61,18 +61,20 @@ fn unique_scalar(positions: &[Vec]) -> Option { Some(first) } -/// Extend `out` with the result of mapping `vars` through `wrap`, skipping any -/// `Candidate` whose underlying `ResolvedVar` is already present in `out`. -/// Preserves first-seen order — relevant because the runtime probes -/// candidates in list order and stops at the first match. +/// Extend `out` with the result of mapping `vars` through `wrap`, skipping +/// any entry equal to one already present. Compares by full `Candidate` +/// (variant + var), so the same `ResolvedVar` can appear once as +/// `Scalar` and once as `Vec` — they are distinct dispatch shapes that +/// must each reach the runtime. Preserves first-seen order. fn extend_dedup( out: &mut Vec, vars: impl IntoIterator, wrap: fn(ResolvedVar) -> Candidate, ) { for v in vars { - if !out.iter().any(|c| c.var() == v) { - out.push(wrap(v)); + let candidate = wrap(v); + if !out.contains(&candidate) { + out.push(candidate); } } } diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index a61daf64..2ea6a8f8 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -263,8 +263,10 @@ impl Vm { e.span.get_or_insert(span); return Err(e); } - } else if let Some((scalars, axis_len)) = self.try_vec_dispatch(args) { - if let Err(mut e) = self.dispatch_vec_call(&scalars, args, axis_len, span) { + } else if let Some((overload_set, axis_len)) = self.try_vec_dispatch(args) { + if let Err(mut e) = + self.dispatch_vec_call_dynamic(&overload_set, args, axis_len, span) + { e.span.get_or_insert(span); return Err(e); } @@ -815,38 +817,22 @@ impl Vm { } } - /// Vec-dispatch fallback for `OpCode::Call`. Returns the matched vec - /// scalars and the broadcast axis length when the callee is an - /// `OverloadSet` with at least one vec candidate *and* the args have - /// a consistent tuple axis length (`vec_axis_len`). - /// - /// Returns `None` to mean "vec doesn't apply" — caller errors out. - fn try_vec_dispatch(&self, args: usize) -> Option<(Vec, usize)> { + /// Probes `OpCode::Call`'s callee for vec dispatch eligibility without + /// allocating: returns `(vec_candidates_vars, axis_len)` borrowed off the + /// stack when vec applies. `dispatch_vec_call_dynamic` resolves the + /// candidates lazily — avoids the `Vec` allocation per call + /// that the eager-resolve version cost on AoC-style hot loops. + fn try_vec_dispatch(&self, args: usize) -> Option<(Rc, usize)> { let Value::Object(obj) = &self.stack[self.stack.len() - args - 1] else { return None; }; - let Object::OverloadSet { vec_candidates, .. } = obj.as_ref() else { - return None; - }; - if vec_candidates.is_empty() { - return None; - } - let start = self.stack.len() - args; - let axis_len = vec_axis_len(&self.stack[start..])?; - let frame_pointer = self.frames.last().expect("no frame").frame_pointer; - let mut scalars: Vec = Vec::with_capacity(vec_candidates.len()); - for var in vec_candidates { - let value = self.resolve_var(var, frame_pointer); - let Value::Object(obj) = value else { continue }; - let Object::Function(f) = obj.as_ref() else { - continue; - }; - scalars.push(f.clone()); - } - if scalars.is_empty() { - None - } else { - Some((scalars, axis_len)) + match obj.as_ref() { + Object::OverloadSet { vec_candidates, .. } if !vec_candidates.is_empty() => { + let start = self.stack.len() - args; + let axis_len = vec_axis_len(&self.stack[start..])?; + Some((Rc::clone(obj), axis_len)) + } + _ => None, } } @@ -896,15 +882,98 @@ impl Vm { }) } - /// Broadcast `scalars` across `axis_len` element positions, looking up the - /// matching scalar per pair when more than one is in play. Non-tuple args - /// broadcast unchanged. - /// - /// **Fast path**: when `scalars` has exactly one entry — which is the - /// `CallVec` opcode case and the "pinned single scalar" `Resolved(Vec)` - /// case at runtime — the per-element overload probe is skipped and the - /// scalar is called directly. This is the bulk of the win over the PR's - /// always-probe dispatcher. + /// Vec dispatch when `vec_candidates` lives behind a shared `Object::OverloadSet` + /// Rc — the runtime-narrowing path for `Binding::Dynamic` operator calls. + /// Resolves the candidate vars to `Function`s lazily inside the broadcast + /// loop with a last-match cache, so homogeneous tuples (the common case, + /// including the `AoC` 2025/08 hot loop) pay one resolve per outer call + /// instead of N. No upfront `Vec` allocation. + fn dispatch_vec_call_dynamic( + &mut self, + overload_set: &Rc, + args: usize, + axis_len: usize, + span: Span, + ) -> Result<(), VmError> { + let Object::OverloadSet { vec_candidates, .. } = overload_set.as_ref() else { + unreachable!("dispatch_vec_call_dynamic invoked with non-OverloadSet callee"); + }; + debug_assert!(!vec_candidates.is_empty()); + + let arg_start = self.stack.len() - args; + let callee_name = self.callee_name(args); + + let arg_values: Vec = self.stack.split_off(arg_start); + self.stack.pop(); // discard the callee slot + + let frame_pointer = self.frames.last().expect("no frame").frame_pointer; + + let mut elem_args: Vec = Vec::with_capacity(args); + let mut results: Vec = Vec::with_capacity(axis_len); + // Cached last-match Function. Homogeneous tuples reuse this across + // positions; heterogeneous tuples fall through to the candidate walk. + let mut last_match: Option = None; + + for i in 0..axis_len { + elem_args.clear(); + for arg in &arg_values { + elem_args.push(vec_element_at(arg, i)); + } + + let scalar: Function = if let Some(f) = &last_match + && f.matches_value_args(&elem_args) + { + f.clone() + } else { + let mut found: Option = None; + for var in vec_candidates { + let value = self.resolve_var(var, frame_pointer); + let Value::Object(obj) = value else { continue }; + let Object::Function(f) = obj.as_ref() else { + continue; + }; + if f.matches_value_args(&elem_args) { + found = Some(f.clone()); + break; + } + } + let Some(f) = found else { + let element_types = elem_args + .iter() + .map(|v| v.static_type().to_string()) + .collect::>() + .join(", "); + let name = callee_name.as_deref().unwrap_or("?"); + return Err(VmError::new( + format!("no overload of '{name}' accepts element {i}: ({element_types})"), + span, + )); + }; + last_match = Some(f.clone()); + f + }; + + let call_args = std::mem::replace(&mut elem_args, Vec::with_capacity(args)); + let result = self.call_callback(scalar, call_args).map_err(|mut e| { + let prefix = match &callee_name { + Some(name) => format!("while vectorising '{name}' at index {i}: "), + None => format!("while vectorising at index {i}: "), + }; + e.message = format!("{prefix}{}", e.message); + e.span.get_or_insert(span); + e + })?; + results.push(result); + } + + self.stack + .push(Value::Object(Rc::new(Object::Tuple(results)))); + Ok(()) + } + + /// Vec dispatch when the scalar set is already resolved — the `CallVec` + /// opcode path. `scalars.len() == 1` is the analyser-pinned fast path + /// that skips the per-element probe entirely. fn dispatch_vec_call( &mut self, scalars: &[Function], @@ -929,6 +998,11 @@ impl Vm { let mut elem_args: Vec = Vec::with_capacity(args); let mut results: Vec = Vec::with_capacity(axis_len); + // Cache the last scalar that matched. Homogeneous tuples — the common + // case at runtime, including the AoC 2025/08 hot loop — all want the + // same scalar at every position, so probing it first short-circuits + // the candidate walk for positions 1..N. + let mut last_match: Option = None; for i in 0..axis_len { elem_args.clear(); @@ -938,8 +1012,16 @@ impl Vm { let scalar: Function = if let Some(f) = pinned { f.clone() + } else if let Some(idx) = last_match + && scalars[idx].matches_value_args(&elem_args) + { + scalars[idx].clone() } else { - let Some(found) = scalars.iter().find(|f| f.matches_value_args(&elem_args)) else { + let Some((idx, found)) = scalars + .iter() + .enumerate() + .find(|(_, f)| f.matches_value_args(&elem_args)) + else { let element_types = elem_args .iter() .map(|v| v.static_type().to_string()) @@ -951,6 +1033,7 @@ impl Vm { span, )); }; + last_match = Some(idx); found.clone() }; diff --git a/tests/functional/programs/013_vector_math/014_vector_any_typed_args.ndc b/tests/functional/programs/013_vector_math/014_vector_any_typed_args.ndc new file mode 100644 index 00000000..bbd4b1f8 --- /dev/null +++ b/tests/functional/programs/013_vector_math/014_vector_any_typed_args.ndc @@ -0,0 +1,13 @@ +// Regression: when both args are statically `Any` but happen to be +// numeric tuples at runtime, the Dynamic binding must carry both the +// scalar overloads (in case the values turn out scalar) AND the vec +// candidates pointing at those same scalar slots. An earlier draft +// deduped vec candidates against their scalar twins by inner var, +// which left the OverloadSet with no vec_candidates and the call +// errored at runtime with "no function called '-' found". +fn id(x) { x }; +let a = id((1, 2, 3)); // statically: Any. runtime: Tuple. +let b = id((4, 5, 6)); +let diff = a - b; +assert_eq(diff, (-3, -3, -3)); +assert_eq(diff * diff, (9, 9, 9)); From e3a4f0829f3df2c04d525e17dec48f8fdfa20f71 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Sun, 24 May 2026 11:49:59 +0200 Subject: [PATCH 3/3] =?UTF-8?q?docs(vm,analyser):=20drop=20external=20repo?= =?UTF-8?q?=20references=20and=20simplify=20comments=20=F0=9F=93=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on PR #146 from @timfennis: * scrub all mentions of an external AoC repo (`benches/programs/vec_hot_loop.ndc`, three comments in `ndc_vm/src/vm.rs`) — those references don't belong here * simplify the OpAssignment "both `op=` and `op`" comment in `ndc_analyser/src/analyser.rs` — drop the jargon, keep the rationale * tighten the `analyse_call` doc comment — say what it does, skip the side-table mechanics that the caller already documents No behaviour change. Co-Authored-By: Claude Opus 4.7 (1M context) --- benches/programs/vec_hot_loop.ndc | 6 ++---- ndc_analyser/src/analyser.rs | 14 ++++---------- ndc_vm/src/vm.rs | 21 +++++++++------------ 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/benches/programs/vec_hot_loop.ndc b/benches/programs/vec_hot_loop.ndc index 5625505d..59767c4b 100644 --- a/benches/programs/vec_hot_loop.ndc +++ b/benches/programs/vec_hot_loop.ndc @@ -1,7 +1,5 @@ -// Vec dispatch hot loop. Mirrors the per-iteration vec calls that drove -// the AoC 2025/08 regression in PR #141: a tight loop over millions of -// `Tuple + Tuple` calls. The CallVec fast path -// should bring this close to the master baseline. +// Vec dispatch hot loop: a tight loop over many +// `Tuple + Tuple` calls. let n = 200_000; let acc = (0, 0); for i in 0..n { diff --git a/ndc_analyser/src/analyser.rs b/ndc_analyser/src/analyser.rs index 30120fa2..3f41ac16 100644 --- a/ndc_analyser/src/analyser.rs +++ b/ndc_analyser/src/analyser.rs @@ -202,11 +202,8 @@ impl Analyser { let right_type = self.analyse_or_any(r_value); let arg_types = vec![left_type, right_type]; - // Both `op=` and `op` desugar from operator syntax, so vec - // dispatch is available for either path. Resolving `op` gives - // us the result type to widen the lvalue with — that's how - // `a += (3, 4)` on `Tuple` widens correctly instead - // of trying to widen with the scalar `+(Int,Int) -> Int`. + // Resolve both `op=` and `op` so we can widen the lvalue + // by the result type of whichever one actually fires. let ResolvedCall { binding: assign_binding, .. @@ -472,11 +469,8 @@ impl Analyser { } } - /// Analyse a call expression — either `Call` (regular) or `OperatorCall`. - /// Resolves the function binding (with vec dispatch eligible iff - /// `kind == Operator`) and returns the inferred result type. The binding - /// is written back into the function-identifier node and the return type - /// recorded on the call's [`NodeId`] via the surrounding `analyse` wrapper. + /// Resolves a call (regular or operator-form) and returns its result type. + /// Only operator-form calls are eligible for vec dispatch. fn analyse_call( &mut self, function: &mut ExpressionLocation, diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 2ea6a8f8..4ad660dd 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -818,10 +818,9 @@ impl Vm { } /// Probes `OpCode::Call`'s callee for vec dispatch eligibility without - /// allocating: returns `(vec_candidates_vars, axis_len)` borrowed off the - /// stack when vec applies. `dispatch_vec_call_dynamic` resolves the - /// candidates lazily — avoids the `Vec` allocation per call - /// that the eager-resolve version cost on AoC-style hot loops. + /// allocating: returns the `OverloadSet` Rc and broadcast axis length + /// when vec applies. `dispatch_vec_call_dynamic` resolves the candidates + /// lazily from there, avoiding a per-call `Vec` allocation. fn try_vec_dispatch(&self, args: usize) -> Option<(Rc, usize)> { let Value::Object(obj) = &self.stack[self.stack.len() - args - 1] else { return None; @@ -882,11 +881,10 @@ impl Vm { }) } - /// Vec dispatch when `vec_candidates` lives behind a shared `Object::OverloadSet` - /// Rc — the runtime-narrowing path for `Binding::Dynamic` operator calls. - /// Resolves the candidate vars to `Function`s lazily inside the broadcast - /// loop with a last-match cache, so homogeneous tuples (the common case, - /// including the `AoC` 2025/08 hot loop) pay one resolve per outer call + /// Vec dispatch for `Binding::Dynamic` operator calls — the runtime + /// picks the matching scalar overload per element position. Resolves + /// candidates lazily from `vec_candidates` with a last-match cache, so + /// homogeneous tuples (the common case) pay one resolve per outer call /// instead of N. No upfront `Vec` allocation. fn dispatch_vec_call_dynamic( &mut self, @@ -999,9 +997,8 @@ impl Vm { let mut elem_args: Vec = Vec::with_capacity(args); let mut results: Vec = Vec::with_capacity(axis_len); // Cache the last scalar that matched. Homogeneous tuples — the common - // case at runtime, including the AoC 2025/08 hot loop — all want the - // same scalar at every position, so probing it first short-circuits - // the candidate walk for positions 1..N. + // case — all want the same scalar at every position, so probing it + // first short-circuits the candidate walk for positions 1..N. let mut last_match: Option = None; for i in 0..axis_len {