diff --git a/Cargo.lock b/Cargo.lock index 7de24578..1d573036 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1121,6 +1121,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.1.5", ] @@ -1728,6 +1730,7 @@ dependencies = [ "bytes", "dashmap", "getrandom 0.3.4", + "hashbrown 0.15.5", "indexmap", "iroh", "iroh-base", diff --git a/Cargo.toml b/Cargo.toml index 58b90be3..e83b8384 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ anyhow = "1" blake3 = "1.8.4" itertools = "0.14.0" indexmap = { version = "2", features = ["rayon"] } +hashbrown = "0.15" lean-ffi = { git = "https://github.com/argumentcomputer/lean-ffi", rev = "cc98ebf67bf453ac3827cb767f78b13ea674dd6a" } mimalloc = { version = "0.1", default-features = false } multi-stark = { git = "https://github.com/argumentcomputer/multi-stark.git", rev = "9ecab51d553445c0cc7b571af00a76b8a83a6f8c" } diff --git a/Ix/Cli/RefsOfCmd.lean b/Ix/Cli/RefsOfCmd.lean new file mode 100644 index 00000000..c2824903 --- /dev/null +++ b/Ix/Cli/RefsOfCmd.lean @@ -0,0 +1,70 @@ +/- + `ix refs-of --ixe `: print the direct constant + references of a constant — its `Constant.refs` entries that resolve to + constants in the env (literal blobs are skipped: they are pinned by + content-addressing and are not well-typedness obligations). + + Prints a comma-separated list of 64-char hex addresses on stdout, the + exact shape `ix tree canonical` expects, so a frontier assumption tree + for a single constant is: + + ix tree canonical $(ix refs-of --ixe env.ixe) +-/ +module +public import Cli +public import Ix.Address +public import Ix.Common +public import Ix.Ixon + +public section + +namespace Ix.Cli.RefsOfCmd + +def parseName (arg : String) : Lean.Name := + arg.splitOn "." |>.foldl (init := .anonymous) + fun acc s => match s.toNat? with + | some n => Lean.Name.mkNum acc n + | none => Lean.Name.mkStr acc s + +def runRefsOfCmd (p : Cli.Parsed) : IO UInt32 := do + let some nameArg := p.positionalArg? "name" + | p.printError "error: must specify "; return 1 + let name := parseName (nameArg.as! String) + let some ixeFlag := p.flag? "ixe" + | p.printError "error: --ixe is required"; return 1 + let path := ixeFlag.as! String + let bytes ← IO.FS.readBinFile path + let ixonEnv ← match Ixon.rsDeEnv bytes with + | .error e => + IO.eprintln s!"error: failed to deserialize {path}: {e}"; return 1 + | .ok env => pure env + let some addr := ixonEnv.getAddr? (Ix.Name.fromLeanName name) + | IO.eprintln s!"error: {name} not found in {path}"; return 1 + let some c := ixonEnv.getConst? addr + | IO.eprintln s!"error: constant for {name} missing from env"; return 1 + let mut seen : Std.HashSet String := {} + let mut out : Array String := #[] + for r in c.refs do + if (ixonEnv.getConst? r).isSome then + let s := toString r + if !seen.contains s then + seen := seen.insert s + out := out.push s + IO.println (String.intercalate "," out.toList) + return 0 + +end Ix.Cli.RefsOfCmd + +open Ix.Cli.RefsOfCmd in +def refsOfCmd : Cli.Cmd := `[Cli| + "refs-of" VIA runRefsOfCmd; + "Print a constant's direct constant references (comma-separated hex), the leaf list for a frontier assumption tree" + + FLAGS: + "ixe" : String; "Path to a serialized `.ixe` env (required)." + + ARGS: + name : String; "Fully-qualified Lean.Name (e.g. `Nat.add_comm`)." +] + +end diff --git a/Ix/IxVM/Kernel/Claim.lean b/Ix/IxVM/Kernel/Claim.lean index 7122431a..3a9d54d5 100644 --- a/Ix/IxVM/Kernel/Claim.lean +++ b/Ix/IxVM/Kernel/Claim.lean @@ -804,43 +804,22 @@ def claim := ⟦ } } - -- Pack the first 4 address bytes (LE) into a u32 key for the skip-set rbtree. - -- - -- Capped at 4 bytes because `RBTreeMap` orders keys with `u32_less_than`, a - -- 32-bit comparator gadget — a wider key (a single `G` could hold 7 bytes in - -- Goldilocks) would overflow it and corrupt the tree ordering. A 32-bit key - -- space means key collisions are possible (~N²/2^33 over N leaves), but they - -- are harmless: a collision makes `addr_in_set`'s confirming `address_eq` - -- fail, yielding a false negative (a frontier const gets rechecked) never a - -- false positive (a wrong skip). See `build_skip_set`. - fn addr_key(a: Addr) -> G { - let arr = load(a); - to_field(arr[0]) - + to_field(arr[1]) * 256 - + to_field(arr[2]) * 65536 - + to_field(arr[3]) * 16777216 - } - - -- Build an O(log N) membership set from the assumption-leaf list, keyed on - -- `addr_key`. Key collisions overwrite — sound because the only consequence is - -- a missed skip (a frontier const gets rechecked, never wrongly trusted); the - -- confirming `address_eq` in `addr_in_set` rules out false positives. - fn build_skip_set(leaves: List‹Addr›, acc: RBTreeMap‹Addr›) -> RBTreeMap‹Addr› { + -- Membership map over the assumption leaves, keyed by `ptr_val`. Sound + -- by the same invariant as `build_addr_pos_map` (Ingress.lean): one + -- pointer maps to one content, so a ptr hit implies the address IS a + -- leaf (no false skip); a de-interned pointer reads as absent, which + -- here only means the constant gets CHECKED instead of skipped — + -- conservative. One tree lookup per constant, no confirming + -- address_eq, no per-lookup address loads (ported from jcb/fixes + -- H-14; replaces the 4-byte-content-keyed set + confirm compare). + fn build_asm_leaf_map(leaves: List‹Addr›) -> RBTreeMap‹G› { match load(leaves) { - ListNode.Nil => acc, + ListNode.Nil => RBTreeMap.Nil, ListNode.Cons(a, rest) => - build_skip_set(rest, rbtree_map_insert(addr_key(a), a, acc)), + rbtree_map_insert(ptr_val(a), 1, build_asm_leaf_map(rest)), } } - -- Membership via one rbtree lookup (cheap u32 key compares) + one confirming - -- full `address_eq`, replacing the O(N) linear `addr_in_list` scan that - -- dominated address_eq cost on sharded checks. - fn addr_in_set(target: Addr, skip_set: RBTreeMap‹Addr›) -> G { - let found = rbtree_map_lookup_or_default(addr_key(target), skip_set, store([0u8; 32])); - address_eq(found, target) - } - -- ============================================================================ -- check_all variant that skips positions whose addr is in the -- supplied assumption-leaf list. @@ -850,27 +829,32 @@ def claim := ⟦ addrs: List‹Addr›, asm_leaves: List‹Addr›) { let _ = check_canonical_block_sort(top); - -- Build the skip-set once (O(N log N)) instead of an O(N) linear scan per - -- checked const. - let skip_set = build_skip_set(asm_leaves, RBTreeMap.Nil); - check_all_skipping_iter(consts, top, addrs, skip_set, 0) + let asm_map = store(build_asm_leaf_map(asm_leaves)); + check_all_skipping_iter(consts, top, addrs, addrs, asm_map, 0) } + -- `cur_addrs` is the suffix of `addrs` aligned with `consts`, walked in + -- lockstep instead of `list_lookup(addrs, pos)` — which re-walks the + -- list prefix every iteration, a standalone quadratic in closure size + -- (ported from jcb/fixes H-14). fn check_all_skipping_iter(consts: List‹&KConstantInfo›, top: List‹&KConstantInfo›, addrs: List‹Addr›, - skip_set: RBTreeMap‹Addr›, + cur_addrs: List‹Addr›, + asm_map: &RBTreeMap‹G›, pos: G) { match load(consts) { ListNode.Nil => (), ListNode.Cons(&ci, rest) => - let addr = list_lookup(addrs, pos); - match addr_in_set(addr, skip_set) { - 1 => - check_all_skipping_iter(rest, top, addrs, skip_set, pos + 1), - _ => - let _ = check_const(ci, pos, top, addrs); - check_all_skipping_iter(rest, top, addrs, skip_set, pos + 1), + match load(cur_addrs) { + ListNode.Cons(addr, rest_addrs) => + match rbtree_map_lookup_or_default(ptr_val(addr), load(asm_map), 0) { + 0 => + let _ = check_const(ci, pos, top, addrs); + check_all_skipping_iter(rest, top, addrs, rest_addrs, asm_map, pos + 1), + _ => + check_all_skipping_iter(rest, top, addrs, rest_addrs, asm_map, pos + 1), + }, }, } } diff --git a/Ix/IxVM/Kernel/Levels.lean b/Ix/IxVM/Kernel/Levels.lean index e9801669..f8bbbfad 100644 --- a/Ix/IxVM/Kernel/Levels.lean +++ b/Ix/IxVM/Kernel/Levels.lean @@ -57,142 +57,583 @@ def levels := ⟦ } } - fn level_has_param(l: KLevel) -> G { - match l { - KLevel.Zero => 0, - KLevel.Param(_) => 1, - KLevel.Succ(&a) => level_has_param(a), - KLevel.Max(&a, &b) => - let ha = level_has_param(a); - match ha { - 1 => 1, - 0 => level_has_param(b), + -- ============================================================================ + -- Canonical level normalization (mirror: src/ix/kernel/level.rs + -- normalize_level / norm_level_eq / norm_level_le, itself a port of + -- Lean4Lean's Level.Normalize with the documented subsumption and + -- covers-split fixes). + -- + -- Semantic level comparison via the recursive `Level.leq` with its + -- two-way param-substitution split per Max/IMax-with-param is + -- exponential in the number of params, and every branch materializes + -- freshly substituted levels (the Int16.instRxcHasSize_eq pathology: + -- 93% of the record was level_subst_reduce/level_leq rows). + -- Normalization is linear in the level size, `level_normalize` is + -- keyed on the level ALONE so each distinct level normalizes once per + -- run, and equality/leq on canonical forms is cheap. + -- + -- Canonical form: a path-sorted list of entries `(path, const, vars)` + -- where `path` is the sorted list of param indices conditioning an + -- imax chain (all assumed >= 1 along that branch), `const` the max + -- constant contribution, `vars` the idx-sorted `(param, offset)` + -- contributions. + -- ============================================================================ + enum NLVar { + Mk(G, G) + } + + enum NLEntry { + Mk(List‹G›, G, List‹&NLVar›) + } + + -- Lexicographic compare of sorted G-lists: 0 = eq, 1 = a < b, 2 = a > b. + fn glist_cmp(a: List‹G›, b: List‹G›) -> G { + match load(a) { + ListNode.Nil => + match load(b) { + ListNode.Nil => 0, + ListNode.Cons(_, _) => 1, }, - KLevel.IMax(&a, &b) => - let hb = level_has_param(b); - match hb { - 1 => 1, - 0 => level_has_param(a), + ListNode.Cons(x, ar) => + match load(b) { + ListNode.Nil => 2, + ListNode.Cons(y, br) => + match u32_less_than(x, y) { + 1 => 1, + 0 => + match u32_less_than(y, x) { + 1 => 2, + 0 => glist_cmp(ar, br), + }, + }, }, } } - fn level_any_param(l: KLevel) -> G { - match l { - KLevel.Param(i) => i, - KLevel.Succ(&a) => level_any_param(a), - KLevel.Max(&a, &b) => - let ha = level_has_param(a); - match ha { - 1 => level_any_param(a), - 0 => level_any_param(b), + -- Sorted-list subset test. + fn glist_subset(xs: List‹G›, ys: List‹G›) -> G { + match load(xs) { + ListNode.Nil => 1, + ListNode.Cons(x, xr) => + match load(ys) { + ListNode.Nil => 0, + ListNode.Cons(y, yr) => + match u32_less_than(x, y) { + 1 => 0, + 0 => + match u32_less_than(y, x) { + 1 => glist_subset(xs, yr), + 0 => glist_subset(xr, yr), + }, + }, }, - KLevel.IMax(&a, &b) => - let hb = level_has_param(b); - match hb { - 1 => level_any_param(b), - 0 => level_any_param(a), + } + } + + fn glist_eq_len(a: List‹G›, b: List‹G›) -> G { + match load(a) { + ListNode.Nil => + match load(b) { + ListNode.Nil => 1, + ListNode.Cons(_, _) => 0, + }, + ListNode.Cons(_, ar) => + match load(b) { + ListNode.Nil => 0, + ListNode.Cons(_, br) => glist_eq_len(ar, br), }, - KLevel.Zero => 0, } } - fn level_subst_reduce(l: KLevel, p: G, repl: KLevel) -> KLevel { - match l { - KLevel.Zero => KLevel.Zero, - KLevel.Param(i) => - match i - p { - 0 => repl, - _ => KLevel.Param(i), + -- Insert into a sorted G-list. Returns (1, new_list) on insert, + -- (0, original) when already present. Mirrors level.rs ordered_insert. + fn glist_ordered_insert(x: G, l: List‹G›) -> (G, List‹G›) { + match load(l) { + ListNode.Nil => (1, store(ListNode.Cons(x, store(ListNode.Nil)))), + ListNode.Cons(h, r) => + match u32_less_than(x, h) { + 1 => (1, store(ListNode.Cons(x, l))), + 0 => + match u32_less_than(h, x) { + 0 => (0, l), + 1 => + match glist_ordered_insert(x, r) { + (f, r2) => (f, store(ListNode.Cons(h, r2))), + }, + }, }, - KLevel.Succ(&a) => - KLevel.Succ(store(level_subst_reduce(a, p, repl))), - KLevel.Max(&a, &b) => - level_max(level_subst_reduce(a, p, repl), level_subst_reduce(b, p, repl)), - KLevel.IMax(&a, &b) => - level_imax(level_subst_reduce(a, p, repl), level_subst_reduce(b, p, repl)), } } - fn level_leq(a: KLevel, b: KLevel) -> G { - match a { - KLevel.Zero => 1, - KLevel.Max(&a1, &a2) => - level_leq(a1, b) * level_leq(a2, b), - KLevel.Succ(&a1) => - match a1 { - KLevel.Max(&x, &y) => - level_leq(KLevel.Succ(store(x)), b) * level_leq(KLevel.Succ(store(y)), b), - _ => - match b { - KLevel.Succ(&b1) => level_leq(a1, b1), - KLevel.Max(&b1, &b2) => - let r1 = level_leq(a, b1); - match r1 { - 1 => 1, + -- Insert (idx, k) into an idx-sorted var list; on duplicate idx keep + -- the max offset. Mirrors Node::add_var. + fn nlvars_add(vars: List‹&NLVar›, idx: G, k: G) -> List‹&NLVar› { + match load(vars) { + ListNode.Nil => + store(ListNode.Cons(store(NLVar.Mk(idx, k)), store(ListNode.Nil))), + ListNode.Cons(v, rest) => + match load(v) { + NLVar.Mk(vi, vo) => + match u32_less_than(idx, vi) { + 1 => store(ListNode.Cons(store(NLVar.Mk(idx, k)), vars)), + 0 => + match u32_less_than(vi, idx) { + 1 => store(ListNode.Cons(v, nlvars_add(rest, idx, k))), 0 => - let r2 = level_leq(a, b2); - match r2 { - 1 => 1, + match u32_less_than(vo, k) { + 1 => store(ListNode.Cons(store(NLVar.Mk(vi, k)), rest)), + 0 => vars, + }, + }, + }, + }, + } + } + + fn nlvars_eq(a: List‹&NLVar›, b: List‹&NLVar›) -> G { + match load(a) { + ListNode.Nil => + match load(b) { + ListNode.Nil => 1, + ListNode.Cons(_, _) => 0, + }, + ListNode.Cons(x, ar) => + match load(b) { + ListNode.Nil => 0, + ListNode.Cons(y, br) => + match load(x) { + NLVar.Mk(xi, xo) => + match load(y) { + NLVar.Mk(yi, yo) => + match xi - yi { + 0 => + match xo - yo { + 0 => nlvars_eq(ar, br), + _ => 0, + }, + _ => 0, + }, + }, + }, + }, + } + } + + fn nlvars_max_offset(vars: List‹&NLVar›) -> G { + match load(vars) { + ListNode.Nil => 0, + ListNode.Cons(v, rest) => + match load(v) { + NLVar.Mk(_, vo) => + let m = nlvars_max_offset(rest); + match u32_less_than(m, vo) { + 1 => vo, + 0 => m, + }, + }, + } + } + + -- Keep x in xs unless ys has the same idx with offset >= x's. + -- Mirrors level.rs subsume_vars (sorted merge walk). + fn nlvars_subsume(xs: List‹&NLVar›, ys: List‹&NLVar›) -> List‹&NLVar› { + match load(xs) { + ListNode.Nil => xs, + ListNode.Cons(x, xr) => + match load(ys) { + ListNode.Nil => xs, + ListNode.Cons(y, yr) => + match load(x) { + NLVar.Mk(xi, xo) => + match load(y) { + NLVar.Mk(yi, yo) => + match u32_less_than(xi, yi) { + 1 => store(ListNode.Cons(x, nlvars_subsume(xr, ys))), 0 => - let bfull = KLevel.Max(store(b1), store(b2)); - let hp = level_has_param(bfull); - match hp { + match u32_less_than(yi, xi) { + 1 => nlvars_subsume(xs, yr), + 0 => + match u32_less_than(yo, xo) { + 1 => store(ListNode.Cons(x, nlvars_subsume(xr, yr))), + 0 => nlvars_subsume(xr, yr), + }, + }, + }, + }, + }, + }, + } + } + + -- Max the constant contribution into the entry at `path` (path-sorted + -- insert). Mirrors norm_add_const incl. its skip rule: k = 0 never + -- contributes; k = 1 only at the empty path (along a non-empty path + -- all conditioning params are >= 1 so the branch value is >= 1 anyway). + fn nl_add_const(acc: List‹&NLEntry›, path: List‹G›, k: G) -> List‹&NLEntry› { + match k { + 0 => acc, + 1 => + match load(path) { + ListNode.Nil => nl_add_const_go(acc, path, k), + ListNode.Cons(_, _) => acc, + }, + _ => nl_add_const_go(acc, path, k), + } + } + + fn nl_add_const_go(acc: List‹&NLEntry›, path: List‹G›, k: G) -> List‹&NLEntry› { + match load(acc) { + ListNode.Nil => + store(ListNode.Cons( + store(NLEntry.Mk(path, k, store(ListNode.Nil))), + store(ListNode.Nil))), + ListNode.Cons(e, rest) => + match load(e) { + NLEntry.Mk(ep, ec, ev) => + match glist_cmp(path, ep) { + 0 => + match u32_less_than(ec, k) { + 1 => store(ListNode.Cons(store(NLEntry.Mk(ep, k, ev)), rest)), + 0 => acc, + }, + 1 => + store(ListNode.Cons( + store(NLEntry.Mk(path, k, store(ListNode.Nil))), acc)), + _ => store(ListNode.Cons(e, nl_add_const_go(rest, path, k))), + }, + }, + } + } + + -- Add var (idx, k) to the entry at `path` (path-sorted insert). + fn nl_add_var(acc: List‹&NLEntry›, path: List‹G›, idx: G, k: G) -> List‹&NLEntry› { + match load(acc) { + ListNode.Nil => + store(ListNode.Cons( + store(NLEntry.Mk(path, 0, + store(ListNode.Cons(store(NLVar.Mk(idx, k)), store(ListNode.Nil))))), + store(ListNode.Nil))), + ListNode.Cons(e, rest) => + match load(e) { + NLEntry.Mk(ep, ec, ev) => + match glist_cmp(path, ep) { + 0 => + store(ListNode.Cons( + store(NLEntry.Mk(ep, ec, nlvars_add(ev, idx, k))), rest)), + 1 => + store(ListNode.Cons( + store(NLEntry.Mk(path, 0, + store(ListNode.Cons(store(NLVar.Mk(idx, k)), store(ListNode.Nil))))), + acc)), + _ => store(ListNode.Cons(e, nl_add_var(rest, path, idx, k))), + }, + }, + } + } + + -- Flatten a level into canonical form. `path` = imax conditioning chain + -- (sorted param idxs), `k` = accumulated Succ offset. Mirrors + -- normalize_aux; the IMax arm delegates to the dispatch (whose cases + -- replicate the aux IMax-shape cases verbatim, as in level.rs). + fn normalize_aux(l: KLevel, path: List‹G›, k: G, + acc: List‹&NLEntry›) -> List‹&NLEntry› { + match l { + KLevel.Zero => nl_add_const(acc, path, k), + KLevel.Succ(&inner) => normalize_aux(inner, path, k + 1, acc), + KLevel.Max(&a, &b) => + normalize_aux(b, path, k, normalize_aux(a, path, k, acc)), + KLevel.IMax(&u, &b) => normalize_imax_dispatch(u, b, path, k, acc), + KLevel.Param(idx) => + match glist_ordered_insert(idx, path) { + (1, new_path) => + nl_add_var(nl_add_const(acc, path, k), new_path, idx, k), + (0, _) => + match k { + 0 => acc, + _ => nl_add_var(acc, path, idx, k), + }, + }, + } + } + + -- imax(a, b) by b's shape: zero kills the branch; succ is never-zero so + -- imax = max; max/imax distribute; param conditions the path. + fn normalize_imax_dispatch(a: KLevel, b: KLevel, path: List‹G›, k: G, + acc: List‹&NLEntry›) -> List‹&NLEntry› { + match b { + KLevel.Zero => nl_add_const(acc, path, k), + KLevel.Succ(&v) => + normalize_aux(v, path, k + 1, normalize_aux(a, path, k, acc)), + KLevel.Max(&v, &w) => + -- imax(a, max(v, w)) = max(imax(a, v), imax(a, w)) + normalize_imax_dispatch(a, w, path, k, + normalize_imax_dispatch(a, v, path, k, acc)), + KLevel.IMax(&v, &w) => + -- imax(a, imax(v, w)) = max(imax(a, w), imax(v, w)) + normalize_imax_dispatch(v, w, path, k, + normalize_imax_dispatch(a, w, path, k, acc)), + KLevel.Param(idx) => + match glist_ordered_insert(idx, path) { + (1, new_path) => + -- param(idx) = 0 branch: imax(a, 0) = 0, contributing k. + normalize_aux(a, new_path, k, + nl_add_var(nl_add_const(acc, path, k), new_path, idx, k)), + (0, _) => + let acc2 = match k { + 0 => acc, + _ => nl_add_var(acc, path, idx, k), + }; + normalize_aux(a, path, k, acc2), + }, + } + } + + -- Phase 2 subsumption: drop contributions dominated by another entry + -- whose path is a subset (active whenever the dominated one is). + -- Each entry folds over the ORIGINAL (snapshot) list, mirroring the + -- Rust snapshot semantics. + fn nl_subsumption(acc: List‹&NLEntry›) -> List‹&NLEntry› { + nl_subsumption_walk(acc, acc) + } + + fn nl_subsumption_walk(rem: List‹&NLEntry›, + snapshot: List‹&NLEntry›) -> List‹&NLEntry› { + match load(rem) { + ListNode.Nil => rem, + ListNode.Cons(e, rest) => + store(ListNode.Cons( + nl_subsume_entry(e, snapshot), + nl_subsumption_walk(rest, snapshot))), + } + } + + fn nl_subsume_entry(e: &NLEntry, snap: List‹&NLEntry›) -> &NLEntry { + match load(snap) { + ListNode.Nil => e, + ListNode.Cons(s, srest) => + match load(e) { + NLEntry.Mk(p1, c1, v1) => + match load(s) { + NLEntry.Mk(p2, c2, v2) => + match glist_subset(p2, p1) { + 0 => nl_subsume_entry(e, srest), + _ => + -- subset + equal length <=> p1 == p2 (self entry) + let same = glist_eq_len(p1, p2); + let c1n = match c1 { + 0 => 0, + _ => + let or1 = match same { + 1 => 1, + 0 => u32_less_than(c2, c1), + }; + let or2 = match load(v2) { + ListNode.Nil => 1, + ListNode.Cons(_, _) => + u32_less_than(nlvars_max_offset(v1) + 1, c1), + }; + match or1 * or2 { 0 => 0, - _ => - let p = level_any_param(bfull); - let sp = KLevel.Succ(store(KLevel.Param(p))); - let a0 = level_subst_reduce(a, p, KLevel.Zero); - let b0 = level_subst_reduce(bfull, p, KLevel.Zero); - let a1s = level_subst_reduce(a, p, sp); - let b1s = level_subst_reduce(bfull, p, sp); - level_leq(a0, b0) * level_leq(a1s, b1s), + _ => c1, }, + }; + let v1n = match same { + 1 => v1, + 0 => + match load(v2) { + ListNode.Nil => v1, + ListNode.Cons(_, _) => nlvars_subsume(v1, v2), + }, + }; + nl_subsume_entry(store(NLEntry.Mk(p1, c1n, v1n)), srest), + }, + }, + }, + } + } + + fn nl_eq(a: List‹&NLEntry›, b: List‹&NLEntry›) -> G { + match load(a) { + ListNode.Nil => + match load(b) { + ListNode.Nil => 1, + ListNode.Cons(_, _) => 0, + }, + ListNode.Cons(x, ar) => + match load(b) { + ListNode.Nil => 0, + ListNode.Cons(y, br) => + match load(x) { + NLEntry.Mk(xp, xc, xv) => + match load(y) { + NLEntry.Mk(yp, yc, yv) => + match glist_cmp(xp, yp) { + 0 => + match xc - yc { + 0 => + match nlvars_eq(xv, yv) { + 1 => nl_eq(ar, br), + 0 => 0, + }, + _ => 0, + }, + _ => 0, }, }, - _ => 0, }, }, - KLevel.Param(i) => - match b { - KLevel.Param(j) => eq_zero(i - j), - KLevel.Succ(&b1) => level_leq(a, b1), - KLevel.Max(&b1, &b2) => - let r1 = level_leq(a, b1); - match r1 { - 1 => 1, - 0 => level_leq(a, b2), + } + } + + -- Does some entry (p2, n2) in l2 with p2 SUBSET-OF p1 dominate the + -- constant c along every assignment activating p1? n2.const counts + -- unconditionally in that branch; each var (idx, off) counts as + -- >= off + 1 because idx IN p2 implies the param is >= 1 there. + -- Mirrors level.rs covers_const (the fixed, split-search version). + fn nl_covers_const(l2: List‹&NLEntry›, p1: List‹G›, c: G) -> G { + match load(l2) { + ListNode.Nil => 0, + ListNode.Cons(e, rest) => + match load(e) { + NLEntry.Mk(p2, c2, v2) => + match glist_subset(p2, p1) { + 0 => nl_covers_const(rest, p1, c), + _ => + let hit = match u32_less_than(c2, c) { + 0 => 1, + 1 => nlvars_any_offset_geq(v2, c), + }; + match hit { + 1 => 1, + 0 => nl_covers_const(rest, p1, c), + }, }, - KLevel.IMax(&b1, &b2) => - let p = level_any_param(b2); - let sp = KLevel.Succ(store(KLevel.Param(p))); - let a0 = level_subst_reduce(a, p, KLevel.Zero); - let bfull = KLevel.IMax(store(b1), store(b2)); - let b0 = level_subst_reduce(bfull, p, KLevel.Zero); - let a1s = level_subst_reduce(a, p, sp); - let b1s = level_subst_reduce(bfull, p, sp); - level_leq(a0, b0) * level_leq(a1s, b1s), - KLevel.Zero => 0, }, - KLevel.IMax(&a1, &a2) => - let not_zero = level_is_not_zero(a2); - match not_zero { - 1 => level_leq(a1, b) * level_leq(a2, b), - 0 => - let p = level_any_param(a2); - let sp = KLevel.Succ(store(KLevel.Param(p))); - let afull = KLevel.IMax(store(a1), store(a2)); - let a0 = level_subst_reduce(afull, p, KLevel.Zero); - let b0 = level_subst_reduce(b, p, KLevel.Zero); - let a1s = level_subst_reduce(afull, p, sp); - let b1s = level_subst_reduce(b, p, sp); - level_leq(a0, b0) * level_leq(a1s, b1s), + } + } + + -- Any var with offset + 1 >= c? + fn nlvars_any_offset_geq(vars: List‹&NLVar›, c: G) -> G { + match load(vars) { + ListNode.Nil => 0, + ListNode.Cons(v, rest) => + match load(v) { + NLVar.Mk(_, vo) => + match u32_less_than(vo + 1, c) { + 0 => 1, + 1 => nlvars_any_offset_geq(rest, c), + }, + }, + } + } + + -- Does some entry (p2, n2) in l2 with p2 SUBSET-OF p1 contain a var + -- (w, off2) with off2 >= off? Mirrors level.rs covers_var. + fn nl_covers_var(l2: List‹&NLEntry›, p1: List‹G›, w: G, off: G) -> G { + match load(l2) { + ListNode.Nil => 0, + ListNode.Cons(e, rest) => + match load(e) { + NLEntry.Mk(p2, _, v2) => + match glist_subset(p2, p1) { + 0 => nl_covers_var(rest, p1, w, off), + _ => + match nlvars_dominates(v2, w, off) { + 1 => 1, + 0 => nl_covers_var(rest, p1, w, off), + }, + }, + }, + } + } + + fn nlvars_dominates(vars: List‹&NLVar›, w: G, off: G) -> G { + match load(vars) { + ListNode.Nil => 0, + ListNode.Cons(v, rest) => + match load(v) { + NLVar.Mk(vi, vo) => + match vi - w { + 0 => + match u32_less_than(vo, off) { + 0 => 1, + 1 => nlvars_dominates(rest, w, off), + }, + _ => nlvars_dominates(rest, w, off), + }, }, } } + -- Semantic l1 <= l2 on canonical forms: every entry's contribution in + -- its activation branch must be dominated by subset-path entries of l2. + -- Mirrors level.rs norm_level_le (with its covers split, sound where + -- Lean4Lean's single-entry search is incomplete). + fn nl_le(l1: List‹&NLEntry›, l2: List‹&NLEntry›) -> G { + match load(l1) { + ListNode.Nil => 1, + ListNode.Cons(e, rest) => + match load(e) { + NLEntry.Mk(p1, c1, v1) => + let c_ok = match c1 { + 0 => 1, + _ => nl_covers_const(l2, p1, c1), + }; + match c_ok { + 0 => 0, + 1 => + match nl_le_vars(v1, l2, p1) { + 0 => 0, + 1 => nl_le(rest, l2), + }, + }, + }, + } + } + + fn nl_le_vars(vars: List‹&NLVar›, l2: List‹&NLEntry›, p1: List‹G›) -> G { + match load(vars) { + ListNode.Nil => 1, + ListNode.Cons(v, rest) => + match load(v) { + NLVar.Mk(vi, vo) => + match nl_covers_var(l2, p1, vi, vo) { + 0 => 0, + 1 => nl_le_vars(rest, l2, p1), + }, + }, + } + } + + -- Keyed on the level alone: each distinct level normalizes once per run. + -- Seeded with the empty-path entry, mirroring normalize_level. + fn level_normalize(l: KLevel) -> List‹&NLEntry› { + let seed = store(ListNode.Cons( + store(NLEntry.Mk(store(ListNode.Nil), 0, store(ListNode.Nil))), + store(ListNode.Nil))); + nl_subsumption(normalize_aux(l, store(ListNode.Nil), 0, seed)) + } + + fn level_leq(a: KLevel, b: KLevel) -> G { + match level_eq(a, b) { + 1 => 1, + 0 => + match a { + KLevel.Zero => 1, + _ => nl_le(level_normalize(a), level_normalize(b)), + }, + } + } + + -- Semantic level equality via canonical forms (mirror: level.rs + -- univ_eq). The previous leq(a,b)*leq(b,a) did a two-way + -- param-substitution split per Max/IMax-with-param -- exponential in + -- params and the dominant record cost on instance-heavy checks. fn level_equal(a: KLevel, b: KLevel) -> G { - level_leq(a, b) * level_leq(b, a) + match level_eq(a, b) { + 1 => 1, + 0 => nl_eq(level_normalize(a), level_normalize(b)), + } } fn level_max(a: KLevel, b: KLevel) -> KLevel { diff --git a/Ix/IxVM/Kernel/Primitive.lean b/Ix/IxVM/Kernel/Primitive.lean index 9300ea1c..31e57dd2 100644 --- a/Ix/IxVM/Kernel/Primitive.lean +++ b/Ix/IxVM/Kernel/Primitive.lean @@ -1228,24 +1228,18 @@ def primitive := ⟦ } } - -- Convert a KLimbs n into a chain `App(Const(succ), App(Const(succ), - -- ... Const(zero)))` for n calls of succ. Used by nat-literal-to-ctor - -- coercion in iota. - fn klimbs_to_ctor_form(n: KLimbs, zero_idx: G, succ_idx: G) -> KExpr { - match load(n) { - ListNode.Nil => - store(KExprNode.Const(zero_idx, store(ListNode.Nil))), - ListNode.Cons(_, _) => - let dec = klimbs_dec(n); - let pred = klimbs_to_ctor_form(dec, zero_idx, succ_idx); - let succ_const = store(KExprNode.Const(succ_idx, store(ListNode.Nil))); - store(KExprNode.App(succ_const, pred)), - } - } - -- If `e` is `Lit(Nat(klimbs))` and addrs contains both Nat.zero and - -- Nat.succ, expand to ctor chain. Else return `e` unchanged. Mirror: - -- src/ix/kernel/whnf.rs:929-946 nat_to_constructor. + -- Nat.succ, expose ONE constructor layer: `0 -> Const(zero)`, + -- `n -> App(Const(succ), Lit(n-1))`. Else return `e` unchanged. Mirror: + -- src/ix/kernel/whnf.rs nat_to_constructor (one level, matching lean4 + -- inductive.h:91-93 and lean4lean Reduce.lean:70). + -- + -- One level, NOT the full succ chain: iota only needs the outermost + -- ctor to fire, and the predecessor stays a compact literal that + -- downstream whnf/def_eq/extract hit via the Nat fast paths. A full + -- `succ^n(zero)` chain materialized n fresh nodes per expansion event + -- and made every traversal of the major O(n) with distinct memo keys + -- per depth. fn nat_lit_to_ctor_or_self(e: KExpr, addrs: List‹Addr›) -> KExpr { match load(e) { KExprNode.Lit(lit) => @@ -1256,7 +1250,15 @@ def primitive := ⟦ match z { (1, zero_idx) => match s { - (1, succ_idx) => klimbs_to_ctor_form(klimbs, zero_idx, succ_idx), + (1, succ_idx) => + match load(klimbs) { + ListNode.Nil => + store(KExprNode.Const(zero_idx, store(ListNode.Nil))), + ListNode.Cons(_, _) => + let pred = store(KExprNode.Lit(KLiteral.Nat(klimbs_dec(klimbs)))); + let succ_const = store(KExprNode.Const(succ_idx, store(ListNode.Nil))); + store(KExprNode.App(succ_const, pred)), + }, _ => e, }, _ => e, @@ -1307,59 +1309,232 @@ def primitive := ⟦ store(KExprNode.Lit(KLiteral.Nat(n))) } - -- 1 iff `head_addr` is one of the head-dispatched primitive ops checked by - -- `try_nat_dispatch` / `try_str_dispatch` / `try_bitvec_dispatch` / - -- `try_reduce_native` / `try_reduce_decidable`. These addresses are mutually - -- exclusive (a const has one content address), so the sum is 0 or 1. - -- Memoized on `head_addr` (a content pointer): computed once per distinct - -- const, vs the per-whnf gauntlet of 5 `try_*` calls. Lets `whnf_const_head` - -- skip the gauntlet for the common non-primitive head. MUST stay in sync - -- with the `address_eq(head_addr, …)` checks in those functions — a - -- differential `assert` guards that during development. - fn prim_any_addr(head_addr: Addr) -> G { - address_eq(head_addr, nat_add_addr()) - + address_eq(head_addr, nat_sub_addr()) - + address_eq(head_addr, nat_mul_addr()) - + address_eq(head_addr, nat_div_addr()) - + address_eq(head_addr, nat_mod_addr()) - + address_eq(head_addr, nat_pow_addr()) - + address_eq(head_addr, nat_gcd_addr()) - + address_eq(head_addr, nat_land_addr()) - + address_eq(head_addr, nat_lor_addr()) - + address_eq(head_addr, nat_xor_addr()) - + address_eq(head_addr, nat_shift_left_addr()) - + address_eq(head_addr, nat_shift_right_addr()) - + address_eq(head_addr, nat_succ_addr()) - + address_eq(head_addr, nat_pred_addr()) - + address_eq(head_addr, nat_beq_addr()) - + address_eq(head_addr, nat_ble_addr()) - + address_eq(head_addr, nat_dec_eq_addr()) - + address_eq(head_addr, nat_dec_le_addr()) - + address_eq(head_addr, nat_dec_lt_addr()) - + address_eq(head_addr, bool_true_addr()) - + address_eq(head_addr, bool_false_addr()) - + address_eq(head_addr, int_of_nat_addr()) - + address_eq(head_addr, int_neg_succ_addr()) - + address_eq(head_addr, int_dec_eq_addr()) - + address_eq(head_addr, int_dec_le_addr()) - + address_eq(head_addr, int_dec_lt_addr()) - + address_eq(head_addr, bit_vec_of_nat_addr()) - + address_eq(head_addr, bit_vec_to_nat_addr()) - + address_eq(head_addr, bit_vec_ult_addr()) - + address_eq(head_addr, decidable_decide_addr()) - + address_eq(head_addr, reduce_bool_addr()) - + address_eq(head_addr, reduce_nat_addr()) - + address_eq(head_addr, size_of_size_of_addr()) - + address_eq(head_addr, string_back_addr()) - + address_eq(head_addr, string_legacy_back_addr()) - + address_eq(head_addr, string_to_byte_array_addr()) - + address_eq(head_addr, subtype_val_addr()) - + address_eq(head_addr, system_platform_get_num_bits_addr()) - + address_eq(head_addr, system_platform_num_bits_addr()) - + address_eq(head_addr, punit_addr()) - + address_eq(head_addr, punit_size_of_1_addr()) - + address_eq(head_addr, unit_addr()) - + address_eq(head_addr, string_utf8_byte_size_addr()) + -- Build the compact stuck offset `Nat.add base (Lit n)` applied to `post`, + -- the canonical normal form for a symbolic-base Nat offset (paired with + -- offset-aware def-eq). n = 0 collapses to `base` directly. Shared by the + -- nat dispatch (symbolic add/div/mod kept stuck) and the linear-rec + -- collapse, keeping the offset construction out of those circuits' width. + fn mk_nat_offset_stuck(base_w: KExpr, n: KLimbs, post: List‹KExpr›, + addrs: List‹Addr›) -> (G, KExpr) { + match klimbs_is_zero(n) { + 1 => (1, apply_spine(base_w, post)), + 0 => + match find_addr_idx_safe(nat_add_addr(), addrs, 0) { + (0, _) => (0, store(KExprNode.BVar(0))), + (1, add_idx) => + let add_const = store(KExprNode.Const(add_idx, store(ListNode.Nil))); + let off = store(KExprNode.App( + store(KExprNode.App(add_const, base_w)), + mk_nat_lit(n))); + (1, apply_spine(off, post)), + }, + } + } + + -- ============================================================================ + -- Primitive-family classification + -- + -- `prim_family(a)` maps a Const-head address to the one reducer family + -- that could possibly fire on it: 0 = none, 1 = nat (`try_nat_dispatch`), + -- 2 = str, 3 = bitvec, 4 = native, 5 = decidable. The family address + -- sets are mutually disjoint (verified against each reducer's + -- comparisons), so classification is unambiguous. + -- + -- Keyed on the ADDRESS ALONE: one memo row per distinct constant + -- address in the whole run (~hundreds), and the inner `address_eq` + -- chains are likewise keyed `(a, prim)` so they collapse after the + -- first occurrence of each address. This replaces running every + -- reducer as a gauntlet from `whnf_const_head`, whose try_* keys + -- include the spine and context — 4-6 wide rows per Const-head whnf, + -- almost all guaranteed misses. + -- ============================================================================ + fn prim_family(a: Addr) -> G { + match is_nat_prim_addr(a) { + 1 => 1, + 0 => + match is_str_prim_addr(a) { + 1 => 2, + 0 => + match is_bitvec_prim_addr(a) { + 1 => 3, + 0 => + match is_native_prim_addr(a) { + 1 => 4, + 0 => + match is_decidable_prim_addr(a) { + 1 => 5, + 0 => 0, + }, + }, + }, + }, + } + } + + -- Addresses handled by `try_nat_dispatch`: succ/pred plus the binop + -- set of `try_nat_binop_addr`. + fn is_nat_prim_addr(a: Addr) -> G { + match address_eq(a, nat_succ_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_pred_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_add_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_sub_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_mul_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_div_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_mod_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_pow_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_gcd_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_beq_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_ble_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_land_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_lor_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_xor_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_shift_left_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_shift_right_addr()) { + 1 => 1, + 0 => + 0, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + } + + -- Addresses handled by `try_str_dispatch`. + fn is_str_prim_addr(a: Addr) -> G { + match address_eq(a, string_utf8_byte_size_addr()) { + 1 => 1, + 0 => + match address_eq(a, string_to_byte_array_addr()) { + 1 => 1, + 0 => + match address_eq(a, string_back_addr()) { + 1 => 1, + 0 => + match address_eq(a, string_legacy_back_addr()) { + 1 => 1, + 0 => + 0, + }, + }, + }, + } + } + + -- Addresses handled by `try_bitvec_dispatch`. + fn is_bitvec_prim_addr(a: Addr) -> G { + match address_eq(a, bit_vec_to_nat_addr()) { + 1 => 1, + 0 => + match address_eq(a, bit_vec_ult_addr()) { + 1 => 1, + 0 => + match address_eq(a, decidable_decide_addr()) { + 1 => 1, + 0 => + 0, + }, + }, + } + } + + -- Addresses handled by `try_reduce_native`. + fn is_native_prim_addr(a: Addr) -> G { + match address_eq(a, reduce_bool_addr()) { + 1 => 1, + 0 => + match address_eq(a, reduce_nat_addr()) { + 1 => 1, + 0 => + match address_eq(a, subtype_val_addr()) { + 1 => 1, + 0 => + match address_eq(a, size_of_size_of_addr()) { + 1 => 1, + 0 => + match address_eq(a, punit_size_of_1_addr()) { + 1 => 1, + 0 => + match address_eq(a, system_platform_num_bits_addr()) { + 1 => 1, + 0 => + 0, + }, + }, + }, + }, + }, + } + } + + -- Addresses handled by `try_reduce_decidable`. + fn is_decidable_prim_addr(a: Addr) -> G { + match address_eq(a, nat_dec_eq_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_dec_lt_addr()) { + 1 => 1, + 0 => + match address_eq(a, nat_dec_le_addr()) { + 1 => 1, + 0 => + match address_eq(a, int_dec_eq_addr()) { + 1 => 1, + 0 => + match address_eq(a, int_dec_lt_addr()) { + 1 => 1, + 0 => + match address_eq(a, int_dec_le_addr()) { + 1 => 1, + 0 => + 0, + }, + }, + }, + }, + }, + } } -- Mirror: src/ix/kernel/whnf.rs:500-700 Nat-on-literals dispatch. @@ -1422,13 +1597,46 @@ def primitive := ⟦ }, _ => (0, store(KExprNode.BVar(0))), }, - _ => (0, store(KExprNode.BVar(0))), + _ => + -- Symbolic base: route to the cold offset-stuck check + -- (separate circuit so its width only taxes actual + -- symbolic-base events, not every literal binop row). + match pb { + (1, nb) => + try_nat_offset_dispatch(head_addr, a0_w, nb, spine, addrs), + _ => (0, store(KExprNode.BVar(0))), + }, }, }, }, } } + -- Cold path of try_nat_dispatch: a binary Nat op whose whnf'd base is + -- symbolic and whose second arg is `Lit nb`. For `Nat.add` (any nb) and + -- `Nat.div`/`Nat.mod` (nb >= 2) the term is irreducible: return verdict + -- 2 = "already stuck in compact offset form" so whnf keeps it instead of + -- delta-unfolding into a succ^nb tower / the division algorithm + -- (`x >>> k` = k nested `Nat.div _ 2`). Pairs with offset-aware def-eq. + fn try_nat_offset_dispatch(head_addr: Addr, a0_w: KExpr, nb: KLimbs, + spine: List‹KExpr›, addrs: List‹Addr›) -> (G, KExpr) { + let is_add = address_eq(head_addr, nat_add_addr()); + let is_divmod = address_eq(head_addr, nat_div_addr()) + + address_eq(head_addr, nat_mod_addr()); + let eligible = + is_add + is_divmod * (1 - klimbs_is_zero(nb)) + * (1 - klimbs_is_zero(klimbs_dec(nb))); + match eligible { + 0 => (0, store(KExprNode.BVar(0))), + _ => + let post = list_drop(spine, 2); + match mk_nat_offset_stuck(a0_w, nb, post, addrs) { + (1, stuck) => (2, stuck), + (0, _) => (0, store(KExprNode.BVar(0))), + }, + } + } + -- Dispatch a Nat binary op by head address. Bool result for beq/ble -- wraps via Bool.true / Bool.false ctors (mk_bool). fn try_nat_binop_addr(head_addr: Addr, a: KLimbs, b: KLimbs, @@ -2418,26 +2626,57 @@ def primitive := ⟦ -- Recognizes Defn-kind constants whose body is `λ x_1 ... x_n → Prj S i (BVar k)` -- and shortcuts the unfolding to a direct `Prj S i args[arity-1-k]`. Pure -- performance: standard delta+beta still produces same result. - fn try_reduce_projection_definition(head_idx: G, spine: List‹KExpr›, - top: List‹&KConstantInfo›) -> (G, KExpr) { + -- Whether the constant at `head_idx` is a projection-definition, and + -- its (arity, struct_idx, field, struct_arg_idx) when so. Keyed on the + -- INDEX alone (top is constant per run): one memo row per distinct + -- constant, where the previous spine-keyed probe paid a row per delta + -- candidate — 12.7M entries on the UTF-8 codec check, scaling 39x per + -- SInt width doubling (structure-bundle instances force projections + -- constantly). + fn proj_def_info_of(head_idx: G, + top: List‹&KConstantInfo›) -> (G, G, G, G, G) { match load(list_lookup(top, head_idx)) { KConstantInfo.Defn(_, _, value, _, _) => - match projection_definition_info(value, 0) { - (1, arity, struct_idx, field, struct_arg_idx) => - match u32_less_than(list_length(spine), arity) { - 1 => (0, store(KExprNode.BVar(0))), - 0 => - let target_arg = list_lookup(spine, struct_arg_idx); - let proj_expr = store(KExprNode.Proj(struct_idx, field, target_arg)); - let post = list_drop(spine, arity); - (1, apply_spine(proj_expr, post)), - }, - _ => (0, store(KExprNode.BVar(0))), - }, + projection_definition_info(value, 0), + _ => (0, 0, 0, 0, 0), + } + } + + -- Scalar gate for inline use (let-bound matches only support scalar + -- scrutinees): 1 iff the constant is a projection-definition. + fn is_proj_def(head_idx: G, top: List‹&KConstantInfo›) -> G { + match proj_def_info_of(head_idx, top) { + (1, _, _, _, _) => 1, + _ => 0, + } + } + + -- The spine-keyed application, called ONLY when `is_proj_def` already + -- said yes (rare), so the per-delta-candidate row of the old + -- ungated form is gone. + fn try_reduce_projection_definition(head_idx: G, spine: List‹KExpr›, + top: List‹&KConstantInfo›) -> (G, KExpr) { + match proj_def_info_of(head_idx, top) { + (1, arity, struct_idx, field, struct_arg_idx) => + proj_def_apply(arity, struct_idx, field, struct_arg_idx, spine), _ => (0, store(KExprNode.BVar(0))), } } + -- The spine-dependent application, reached only for ACTUAL + -- projection-definitions (rare). + fn proj_def_apply(arity: G, struct_idx: G, field: G, struct_arg_idx: G, + spine: List‹KExpr›) -> (G, KExpr) { + match u32_less_than(list_length(spine), arity) { + 1 => (0, store(KExprNode.BVar(0))), + 0 => + let target_arg = list_lookup(spine, struct_arg_idx); + let proj_expr = store(KExprNode.Proj(struct_idx, field, target_arg)); + let post = list_drop(spine, arity); + (1, apply_spine(proj_expr, post)), + } + } + -- Mirror: src/ix/kernel/whnf.rs:1441-1500 try_reduce_fin_val_decidable_rec -- + project_decidable_fin_val_minor. -- diff --git a/Ix/IxVM/Kernel/Subst.lean b/Ix/IxVM/Kernel/Subst.lean index e37d1e0d..9d8a7ce6 100644 --- a/Ix/IxVM/Kernel/Subst.lean +++ b/Ix/IxVM/Kernel/Subst.lean @@ -202,6 +202,14 @@ def subst := ⟦ } } + -- Recursive walk. App children are guarded INLINE at the call site + -- (lbr check, then straight into the walk): a no-op subtree produces + -- NO query row at all, and a real-work child produces one walk row + -- instead of a dispatch row + a walk row — 35% of all expr_inst1 + -- entries on instantiation-heavy checks were no-op dispatch rows. + -- `expr_lbr` is keyed on the child alone, so the guard memoizes + -- across arg/depth. App-arm only: guarding the binder arms too blew + -- the walk width 53 -> 141 and REGRESSED FFT +4%. fn expr_inst1_walk(e: KExpr, arg: KExpr, depth: G) -> KExpr { match load(e) { KExprNode.BVar(i) => @@ -214,12 +222,18 @@ def subst := ⟦ _ => store(KExprNode.BVar(i - 1)), }, }, - KExprNode.Srt(l) => store(KExprNode.Srt(l)), - KExprNode.Const(idx, lvls) => store(KExprNode.Const(idx, lvls)), + KExprNode.Srt(_) => e, + KExprNode.Const(_, _) => e, KExprNode.App(f, a) => - store(KExprNode.App( - expr_inst1(f, arg, depth), - expr_inst1(a, arg, depth))), + let f2 = match u32_less_than(depth, expr_lbr(f)) { + 0 => f, + 1 => expr_inst1_walk(f, arg, depth), + }; + let a2 = match u32_less_than(depth, expr_lbr(a)) { + 0 => a, + 1 => expr_inst1_walk(a, arg, depth), + }; + store(KExprNode.App(f2, a2)), KExprNode.Lam(ty, body) => store(KExprNode.Lam( expr_inst1(ty, arg, depth), @@ -233,7 +247,7 @@ def subst := ⟦ expr_inst1(ty, arg, depth), expr_inst1(val, arg, depth), expr_inst1(body, arg, depth + 1))), - KExprNode.Lit(lit) => store(KExprNode.Lit(lit)), + KExprNode.Lit(_) => e, KExprNode.Proj(tidx, fidx, e1) => store(KExprNode.Proj(tidx, fidx, expr_inst1(e1, arg, depth))), } diff --git a/Ix/IxVM/Kernel/Whnf.lean b/Ix/IxVM/Kernel/Whnf.lean index d3e76b69..93348a0e 100644 --- a/Ix/IxVM/Kernel/Whnf.lean +++ b/Ix/IxVM/Kernel/Whnf.lean @@ -56,6 +56,15 @@ def whnf := ⟦ -- ============================================================================ -- Beta + -- + -- One argument per step, NOT a simultaneous multi-arg substitution: a + -- measured choice. A simultaneous `instantiate_rev`-style peel was + -- tried (2026-06-10) and REGRESSED FFT ~3%: recursor betas share a + -- constant argument prefix across iterations (motive/base/step), so + -- the sequential chain's per-arg memo keys hit across every iteration, + -- while a combined args-list key is unique per iteration and re-walks + -- the whole body each time. Curried beats tupled under Aiur's + -- content-memoization. -- ============================================================================ -- Peel a beta telescope: consume one spine arg per leading `Lam` of `lam`, @@ -164,82 +173,6 @@ def whnf := ⟦ } } - -- If `head` is a Nat primitive (`Nat.add` / `Nat.div` / `Nat.mod`) applied to - -- exactly (non-literal base, literal second arg), return (1, the same op in - -- canonical form) so whnf leaves it STUCK instead of delta-unfolding it. This - -- stops `Nat.add x n` from materializing succ^n(x), and `Nat.div`/`Nat.mod x n` - -- (n ≥ 2) from expanding the division algorithm — both are irreducible for a - -- symbolic base, so the compact form IS the normal form. `Nat.shiftRight x k` - -- unfolds to k nested `Nat.div _ 2`, which now stay stuck. Thresholds: `add` - -- keeps any nonzero n; `div`/`mod` keep n ≥ 2 (so `x/1 = x`, `x/0 = 0` still - -- reduce). All magnitudes stay KLimbs. `(0, _)` = "not this shape". - fn try_nat_offset_stuck(head: KExpr, spine: List‹KExpr›, types: List‹KExpr›, - top: List‹&KConstantInfo›, addrs: List‹Addr›) -> (G, KExpr) { - match load(head) { - KExprNode.Const(idx, _) => - let ca = list_lookup(addrs, idx); - let is_add = address_eq(ca, nat_add_addr()); - let is_divmod = address_eq(ca, nat_div_addr()) + address_eq(ca, nat_mod_addr()); - match is_add + is_divmod { - 0 => (0, store(KExprNode.BVar(0))), - _ => - match list_length(spine) { - 2 => - let a0_w = whnf(list_lookup(spine, 0), types, top, addrs); - let a1_w = whnf(list_lookup(spine, 1), types, top, addrs); - match try_extract_nat(a1_w, addrs) { - (0, _) => (0, store(KExprNode.BVar(0))), - (1, n) => - -- reject n=0 (all ops) and n=1 (div/mod only). - let bad = klimbs_is_zero(n) + is_divmod * klimbs_is_zero(klimbs_dec(n)); - match bad { - 0 => - match try_extract_nat(a0_w, addrs) { - (1, _) => (0, store(KExprNode.BVar(0))), - (0, _) => - (1, store(KExprNode.App( - store(KExprNode.App(head, a0_w)), - mk_nat_lit(n)))), - }, - _ => (0, store(KExprNode.BVar(0))), - }, - }, - _ => (0, store(KExprNode.BVar(0))), - }, - }, - _ => (0, store(KExprNode.BVar(0))), - } - } - - -- The address-keyed primitive gauntlet: nat → str → bitvec → native → - -- decidable, in order. Returns (1, reduced) on the first family that matches - -- and reduces, else (0, _). Factored out so `prim_any_addr` can gate the - -- whole chain for non-primitive heads in one shot. - fn try_address_primitives(head_addr: Addr, idx: G, lvls: List‹&KLevel›, - spine: List‹KExpr›, types: List‹KExpr›, - top: List‹&KConstantInfo›, addrs: List‹Addr›) -> (G, KExpr) { - match try_nat_dispatch(head_addr, spine, types, top, addrs) { - (1, r) => (1, r), - (0, _) => - match try_str_dispatch(head_addr, spine, addrs) { - (1, r) => (1, r), - (0, _) => - match try_bitvec_dispatch(head_addr, spine, types, top, addrs) { - (1, r) => (1, r), - (0, _) => - match try_reduce_native(head_addr, spine, types, top, addrs) { - (1, r) => (1, r), - (0, _) => - match try_reduce_decidable(head_addr, idx, lvls, spine, types, top, addrs) { - (1, r) => (1, r), - (0, _) => (0, store(KExprNode.BVar(0))), - }, - }, - }, - }, - } - } - -- Const-head WHNF dispatch, split out of `whnf_with_spine` (see its Const arm). -- `head` is the original `Const(idx, lvls)` KExpr, passed for the stuck -- `apply_spine(head, spine)` fallbacks. @@ -264,40 +197,61 @@ def whnf := ⟦ (0, _) => apply_spine(head, spine), }, _ => - -- Skip the address-primitive gauntlet for the common non-primitive - -- head: `prim_any_addr` (memoized per head address) is 1 only for the - -- ~43 ops the gauntlet recognizes, so a 0 means no `try_*` can match — - -- go straight to projection-definition / delta. Validated by a - -- differential assert (now removed) over the suite + heavy consts. - let addr_prim = match prim_any_addr(head_addr) { - 1 => try_address_primitives(head_addr, idx, lvls, spine, types, top, addrs), + -- Family-gated primitive dispatch. `prim_family` is keyed on the + -- head ADDRESS alone, so it memoizes to one row per distinct + -- constant address in the run; at most ONE family reducer is + -- then called. The previous form ran every reducer as a gauntlet + -- (nat -> str -> bitvec -> native -> decidable), charging 4-6 + -- wide rows per Const-head whnf for guaranteed misses — the + -- families' address sets are mutually disjoint, so the gauntlet + -- order never mattered. A family-reducer miss (e.g. `Nat.add` on + -- non-literal args) falls through to proj-def/delta exactly as + -- the gauntlet's final arm did. + let fam = prim_family(head_addr); + let attempt = match fam { + 1 => try_nat_dispatch(head_addr, spine, types, top, addrs), + 2 => try_str_dispatch(head_addr, spine, addrs), + 3 => try_bitvec_dispatch(head_addr, spine, types, top, addrs), + 4 => try_reduce_native(head_addr, spine, types, top, addrs), + 5 => try_reduce_decidable(head_addr, idx, lvls, spine, types, top, addrs), _ => (0, store(KExprNode.BVar(0))), }; - match addr_prim { + match attempt { (1, reduced) => whnf(reduced, types, top, addrs), + -- verdict 2: the reducer normalized the term to an already-stuck + -- compact form (symbolic Nat offset); return it WITHOUT re-whnf, + -- which would loop. + (2, stuck) => stuck, (0, _) => - let proj_def_pair = try_reduce_projection_definition(idx, spine, top); - match proj_def_pair { - (1, reduced_pd) => whnf(reduced_pd, types, top, addrs), - (0, _) => - -- Mirror src/ix/kernel/whnf.rs:756-774 (`delta_unfold_one`): - -- unfold any Defn regardless of `ReducibilityHints`. - match ci { - KConstantInfo.Defn(_, _, value, _, _) => - -- Keep `Nat.add base (Lit n)` (symbolic base) stuck as a - -- compact offset instead of delta-unfolding into a - -- succ^n tower. Pairs with offset-aware def-eq. - match try_nat_offset_stuck(head, spine, types, top, addrs) { - (1, stuck) => stuck, - (0, _) => - let body = expr_inst_levels(value, lvls); - whnf_with_spine(body, spine, types, top, addrs), - }, - KConstantInfo.Thm(_, _, _) => apply_spine(head, spine), - _ => apply_spine(head, spine), - }, + -- gate on the idx-keyed proj-def classification (memoized per + -- constant): only real projection-definitions pay the + -- spine-keyed apply row + let proj_def_pair = match is_proj_def(idx, top) { + 1 => try_reduce_projection_definition(idx, spine, top), + _ => (0, store(KExprNode.BVar(0))), + }; + match proj_def_pair { + (1, reduced_pd) => whnf(reduced_pd, types, top, addrs), + (0, _) => + -- Mirror src/ix/kernel/whnf.rs:756-774 + -- (`delta_unfold_one`): unfold any Defn + -- regardless of `ReducibilityHints`. The + -- hint is consulted by lazy-delta's + -- `delta_rank` for def-eq priority, not + -- as a gate on plain whnf delta. Without + -- unfolding here, ctor field types + -- written via opaque defs (e.g. + -- `constType (n α) (n α)`) stay stuck + -- and `check_positivity_aug` misclassifies. + match ci { + KConstantInfo.Defn(_, _, value, _, _) => + let body = expr_inst_levels(value, lvls); + whnf_with_spine(body, spine, types, top, addrs), + KConstantInfo.Thm(_, _, _) => apply_spine(head, spine), + _ => apply_spine(head, spine), }, }, + }, } } @@ -557,20 +511,7 @@ def whnf := ⟦ -- value in the same compact `base + n` form a literal already -- has, so def-eq converges instead of descending n unary -- succ layers (the UTF-8 `x + 0xC0` pathology). - (0, _) => - match klimbs_is_zero(n_klimbs) { - 1 => (1, apply_spine(base_w, post)), - 0 => - match find_addr_idx_safe(nat_add_addr(), addrs, 0) { - (0, _) => (0, store(KExprNode.BVar(0))), - (1, add_idx) => - let add_const = store(KExprNode.Const(add_idx, store(ListNode.Nil))); - let off = store(KExprNode.App( - store(KExprNode.App(add_const, base_w)), - mk_nat_lit(n_klimbs))); - (1, apply_spine(off, post)), - }, - }, + (0, _) => mk_nat_offset_stuck(base_w, n_klimbs, post, addrs), (1, b_klimbs) => (1, apply_spine(mk_nat_lit(klimbs_add(b_klimbs, n_klimbs)), post)), }, diff --git a/Ix/IxVM/KernelTypes.lean b/Ix/IxVM/KernelTypes.lean index b4a57b34..3d511edd 100644 --- a/Ix/IxVM/KernelTypes.lean +++ b/Ix/IxVM/KernelTypes.lean @@ -68,33 +68,6 @@ def kernelTypes := ⟦ } } - -- ============================================================================ - -- Values (NbE semantic domain) - -- ============================================================================ - - enum KValNode { - Srt(&KLevel), - Lit(KLiteral), - Lam(KVal, KExpr, KValEnv), - Pi(KVal, KExpr, KValEnv), - Ctor(G, List‹&KLevel›, G, List‹KVal›), - FVar(G, KVal, List‹KVal›), - Axiom(G, List‹&KLevel›, List‹KVal›), - Defn(G, List‹&KLevel›, List‹KVal›), - Thm(G, List‹&KLevel›, List‹KVal›), - Opaque(G, List‹&KLevel›, List‹KVal›), - Quot(G, List‹&KLevel›, List‹KVal›), - Induct(G, List‹&KLevel›, List‹KVal›), - Rec(G, List‹&KLevel›, List‹KVal›), - Proj(G, G, KVal, List‹KVal›), - Thunk(KExpr, KValEnv) - } - - type KVal = &KValNode - - -- Value environment (de Bruijn indexed, front = most recent binder) - type KValEnv = List‹KVal› - -- ============================================================================ -- Recursor Rule -- diff --git a/Main.lean b/Main.lean index e72ae1e8..5f70ba71 100644 --- a/Main.lean +++ b/Main.lean @@ -1,5 +1,6 @@ --import Ix.Cli.StoreCmd import Ix.Cli.AddrOfCmd +import Ix.Cli.RefsOfCmd import Ix.Cli.CheckCmd import Ix.Cli.CheckRsCmd import Ix.Cli.ClaimCmd @@ -34,6 +35,7 @@ def ixCmd : Cli.Cmd := `[Cli| shardCmd; verifyCmd; addrOfCmd; + refsOfCmd; ingressCmd; validateCmd; serveCmd; diff --git a/src/aiur/execute.rs b/src/aiur/execute.rs index e28dae6e..c724f9b9 100644 --- a/src/aiur/execute.rs +++ b/src/aiur/execute.rs @@ -15,12 +15,165 @@ use crate::{ }, }; -pub struct QueryResult { - pub(crate) output: Vec, +/// Immutable view of one query entry. +#[derive(Clone, Copy)] +pub struct QueryRef<'a> { + pub(crate) output: &'a [G], pub(crate) multiplicity: G, } -pub type QueryMap = FxIndexMap, QueryResult>; +/// Mutable view of one query entry: the output is fixed at insertion, +/// only the multiplicity is bumped on memo hits. +pub struct QueryRefMut<'a> { + pub(crate) output: &'a [G], + pub(crate) multiplicity: &'a mut G, +} + +fn hash_g_slice(key: &[G]) -> u64 { + use std::hash::Hasher; + let mut h = rustc_hash::FxHasher::default(); + for g in key { + h.write_u64(g.as_canonical_u64()); + } + h.finish() +} + +/// Append-only query store with a hash index. +/// +/// Functionally the insertion-ordered map `args -> (output, multiplicity)` +/// it replaces (`FxIndexMap, QueryResult>`) — but every circuit has +/// a FIXED key arity and output width, so keys and outputs live in flat +/// `Vec` arenas addressed by entry index, and the hash table holds only +/// `u32` indices. This cuts per-entry overhead from ~130 B (two heap +/// `Vec`s + IndexMap bucket + allocator metadata) to the raw field +/// elements plus ~13 B of index. The record IS the proof witness, so +/// entries cannot be dropped — only stored compactly; on kernel-heavy +/// executions it is the dominant RAM consumer (billions of entries). +/// +/// Entry index == insertion order; memory circuits use it as the pointer +/// value, mirroring the old `IndexMap::get_index_of` semantics. +pub struct QueryMap { + key_stride: usize, + /// Output width; inferred on first insert (not statically available in + /// `FunctionLayout`). + out_stride: usize, + keys: Vec, + outs: Vec, + mults: Vec, + table: hashbrown::HashTable, +} + +impl QueryMap { + pub fn new(key_stride: usize) -> Self { + Self { + key_stride, + out_stride: 0, + keys: Vec::new(), + outs: Vec::new(), + mults: Vec::new(), + table: hashbrown::HashTable::new(), + } + } + + #[inline] + pub fn len(&self) -> usize { + self.mults.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.mults.is_empty() + } + + /// Total retained field elements (keys + outputs); used by the + /// `IX_AIUR_QUERY_STATS` RAM-attribution dump. + pub fn retained_elems(&self) -> usize { + self.keys.len() + self.outs.len() + } + + #[inline] + fn key_at(&self, i: usize) -> &[G] { + &self.keys[i * self.key_stride..(i + 1) * self.key_stride] + } + + #[inline] + fn out_at(&self, i: usize) -> &[G] { + &self.outs[i * self.out_stride..(i + 1) * self.out_stride] + } + + pub fn get_index_of(&self, key: &[G]) -> Option { + debug_assert_eq!(key.len(), self.key_stride); + let hash = hash_g_slice(key); + self + .table + .find(hash, |&i| self.key_at(i as usize) == key) + .map(|&i| i as usize) + } + + pub fn get(&self, key: &[G]) -> Option> { + let i = self.get_index_of(key)?; + Some(QueryRef { output: self.out_at(i), multiplicity: self.mults[i] }) + } + + pub fn get_mut(&mut self, key: &[G]) -> Option> { + let i = self.get_index_of(key)?; + let output = &self.outs[i * self.out_stride..(i + 1) * self.out_stride]; + Some(QueryRefMut { output, multiplicity: &mut self.mults[i] }) + } + + /// Append a new entry. The key must not already be present: call sites + /// only insert on a confirmed miss, and a same-key re-entrant call + /// would loop forever before reaching its own insert. + pub fn insert(&mut self, key: &[G], output: &[G], multiplicity: G) { + debug_assert_eq!(key.len(), self.key_stride); + debug_assert!(self.get_index_of(key).is_none()); + if self.mults.is_empty() { + self.out_stride = output.len(); + } else { + debug_assert_eq!(output.len(), self.out_stride); + } + let hash = hash_g_slice(key); + let i = u32::try_from(self.mults.len()).expect("query map overflow"); + self.keys.extend_from_slice(key); + self.outs.extend_from_slice(output); + self.mults.push(multiplicity); + let keys = &self.keys; + let stride = self.key_stride; + self.table.insert_unique(hash, i, |&j| { + let j = j as usize; + hash_g_slice(&keys[j * stride..(j + 1) * stride]) + }); + } + + /// Entry at insertion index `i`: the key slice plus a mutable handle on + /// the multiplicity (memory `Load` bumps the pointed-to row's count). + pub fn get_index_mut(&mut self, i: usize) -> Option<(&[G], &mut G)> { + if i >= self.mults.len() { + return None; + } + let key = &self.keys[i * self.key_stride..(i + 1) * self.key_stride]; + Some((key, &mut self.mults[i])) + } + + pub fn get_index(&self, i: usize) -> Option<(&[G], QueryRef<'_>)> { + if i >= self.len() { + return None; + } + Some(( + self.key_at(i), + QueryRef { output: self.out_at(i), multiplicity: self.mults[i] }, + )) + } + + pub fn iter(&self) -> impl Iterator)> { + (0..self.len()).map(|i| { + ( + self.key_at(i), + QueryRef { output: self.out_at(i), multiplicity: self.mults[i] }, + ) + }) + } +} pub struct QueryRecord { pub(crate) function_queries: Vec, @@ -31,12 +184,15 @@ pub struct QueryRecord { impl QueryRecord { fn new(toplevel: &Toplevel) -> Self { - let function_queries = - toplevel.functions.iter().map(|_| QueryMap::default()).collect(); + let function_queries = toplevel + .functions + .iter() + .map(|f| QueryMap::new(f.layout.input_size)) + .collect(); let memory_queries = toplevel .memory_sizes .iter() - .map(|width| (*width, QueryMap::default())) + .map(|width| (*width, QueryMap::new(*width))) .collect(); let bytes1_queries = Bytes1Queries::new(); let bytes2_queries = Bytes2Queries::new(); @@ -158,6 +314,40 @@ impl std::fmt::Display for ExecError { impl std::error::Error for ExecError {} +/// Gated by `IX_AIUR_QUERY_STATS=1`: dump per-function query-map sizes +/// during/after execution so RAM blowups can be attributed to specific +/// Aiur functions (indices resolve to names via the Lean +/// `CompiledToplevel`). Heaviest maps first, by retained G elements. +static QUERY_STATS: std::sync::LazyLock = std::sync::LazyLock::new( + || std::env::var_os("IX_AIUR_QUERY_STATS").is_some(), +); + +fn dump_query_stats(record: &QueryRecord, tag: &str) { + let mut rows: Vec<(usize, usize, usize)> = record + .function_queries + .iter() + .enumerate() + .map(|(i, m)| (i, m.len(), m.retained_elems())) + .filter(|(_, n, _)| *n > 0) + .collect(); + rows.sort_by(|a, b| b.2.cmp(&a.2)); + let total_entries: usize = rows.iter().map(|r| r.1).sum(); + let total_elems: usize = rows.iter().map(|r| r.2).sum(); + eprintln!( + "[aiur-stats {tag}] function_queries: {total_entries} entries, \ + {total_elems} G-elems; top maps:" + ); + for (i, n, e) in rows.iter().take(30) { + eprintln!(" fn{i:<4} entries={n:<12} g_elems={e}"); + } + let mem: Vec = record + .memory_queries + .iter() + .map(|(w, m)| format!("w{w}={}", m.len())) + .collect(); + eprintln!("[aiur-stats {tag}] memory entries: {}", mem.join(" ")); +} + impl Toplevel { pub fn execute( &self, @@ -172,6 +362,9 @@ impl Toplevel { let function = &self.functions[fun_idx]; let output = function.execute(fun_idx, args, self, &mut record, io_buffer)?; + if *QUERY_STATS { + dump_query_stats(&record, "final"); + } Ok((record, output)) } } @@ -213,7 +406,14 @@ impl Function { } push_block_exec_entries!(&self.body); let mut unconstrained = false; + let mut stats_ops: u64 = 0; while let Some(exec_entry) = exec_entries_stack.pop() { + if *QUERY_STATS { + stats_ops += 1; + if stats_ops.is_multiple_of(1 << 31) { + dump_query_stats(record, &format!("ops={stats_ops}")); + } + } match exec_entry { ExecEntry::Op(Op::Const(c)) => map.push(*c), ExecEntry::Op(Op::Add(a, b)) => { @@ -236,14 +436,14 @@ impl Function { map.push(G::from_bool(a == G::ZERO)); }, ExecEntry::Op(Op::Call(callee_idx, args, _, op_unconstrained)) => { - let args = args.iter().map(|i| map[*i]).collect(); + let args: Vec = args.iter().map(|i| map[*i]).collect(); if let Some(result) = record.function_queries[*callee_idx].get_mut(&args) { if !unconstrained && !op_unconstrained { - result.multiplicity += G::ONE; + *result.multiplicity += G::ONE; } - map.extend(result.output.clone()); + map.extend_from_slice(result.output); } else { let saved_map = std::mem::replace(&mut map, args); callers_states_stack.push(CallerState { @@ -266,16 +466,16 @@ impl Function { .ok_or(ExecError::InvalidMemorySize(size))?; if let Some(result) = memory_queries.get_mut(&values) { if !unconstrained { - result.multiplicity += G::ONE; + *result.multiplicity += G::ONE; } - map.extend(&result.output); + map.extend_from_slice(result.output); } else { let ptr = G::from_usize(memory_queries.len()); - let result = QueryResult { - output: vec![ptr], - multiplicity: G::from_bool(!unconstrained), - }; - memory_queries.insert(values, result); + memory_queries.insert( + &values, + &[ptr], + G::from_bool(!unconstrained), + ); map.push(ptr); } }, @@ -289,13 +489,13 @@ impl Function { let ptr_usize = usize::try_from(ptr_u64) .ok() .ok_or(ExecError::PointerTooLarge(ptr_u64))?; - let (args, result) = memory_queries + let (args, multiplicity) = memory_queries .get_index_mut(ptr_usize) .ok_or(ExecError::UnboundPointer { ptr: ptr_u64, size: *size })?; if !unconstrained { - result.multiplicity += G::ONE; + *multiplicity += G::ONE; } - map.extend(args); + map.extend_from_slice(args); }, ExecEntry::Op(Op::AssertEq(xs, ys)) => { if xs.len() != ys.len() { @@ -542,13 +742,12 @@ impl Function { ExecEntry::Ctrl(Ctrl::Return(_, output)) => { // Register the query. let input_size = toplevel.functions[fun_idx].layout.input_size; - let args = map[..input_size].to_vec(); let output = output.iter().map(|i| map[*i]).collect::>(); - let result = QueryResult { - output: output.clone(), - multiplicity: G::from_bool(!unconstrained), - }; - record.function_queries[fun_idx].insert(args, result); + record.function_queries[fun_idx].insert( + &map[..input_size], + &output, + G::from_bool(!unconstrained), + ); if let Some(CallerState { fun_idx: caller_idx, map: caller_map, diff --git a/src/aiur/memory.rs b/src/aiur/memory.rs index fa676394..e2a90d4d 100644 --- a/src/aiur/memory.rs +++ b/src/aiur/memory.rs @@ -7,7 +7,7 @@ use multi_stark::{ }; use rayon::{ iter::{ - IndexedParallelIterator, IntoParallelRefIterator, + IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator, }, slice::ParallelSliceMut, @@ -69,10 +69,10 @@ impl Memory { rows_no_padding .par_chunks_mut(width) - .zip(queries.par_iter()) .zip(lookups_no_padding.par_iter_mut()) .enumerate() - .for_each(|(i, ((row, (values, result)), row_lookups))| { + .for_each(|(i, (row, row_lookups))| { + let (values, result) = queries.get_index(i).expect("index in range"); row[0] = result.multiplicity; row[1] = G::ONE; row[2] = G::from_usize(i);