From 7d89c796cdd9ac6f974cc923f115662d9ab6ee77 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 14:40:28 -0300
Subject: [PATCH 01/28] multi-stark proof deserializer

---
 Ix/MultiStark.lean             |  45 ++++
 Ix/MultiStark/Deserialize.lean | 385 +++++++++++++++++++++++++++++++++
 2 files changed, 430 insertions(+)
 create mode 100644 Ix/MultiStark.lean
 create mode 100644 Ix/MultiStark/Deserialize.lean
diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
new file mode 100644
index 00000000..9526ae4d
--- /dev/null
+++ b/Ix/MultiStark.lean
@@ -0,0 +1,45 @@
+module
+public import Ix.Aiur.Meta
+public import Ix.IxVM.Core
+public import Ix.IxVM.ByteStream
+public import Ix.MultiStark.Deserialize
+
+/-!
+# Multi-STARK proof verifier (Aiur)
+
+Step 1 towards a recursive verifier: an Aiur entrypoint scaffold that
+deserializes a `multi_stark::prover::Proof` (see `Ix/MultiStark/Deserialize.lean`).
+
+The deserializer is the wire-format reader; the actual verification logic will
+hang off `read_proof`. For now the entrypoint builds the `Proof` object and
+discards it.
+-/
+
+public section
+
+namespace MultiStark
+
+def entrypoints := ⟦
+  -- Read the proof bytes non-deterministically from IO key `[0]`, deserialize
+  -- into a `Proof` object, and assert the whole stream was consumed. The proof
+  -- is then discarded — actual verification logic will hang off `read_proof`.
+  pub fn verify_multi_stark_proof() {
+    let (idx, len) = io_get_info([0]);
+    let bytes = #read_byte_stream(idx, len);
+    let (_proof, rest) = read_proof(bytes);
+    assert_eq!(load(rest), ListNode.Nil);
+    ()
+  }
+⟧
+
+/-- The standalone Multi-STARK verifier toplevel: `core` (lists/options) +
+`byteStream` (`U64`, `flatten_u64`, `read_byte_stream`, …) + the deserializer
+and entrypoint. -/
+def multiStark : Except Aiur.Global Aiur.Source.Toplevel := do
+  let t ← IxVM.core.merge IxVM.byteStream
+  let t ← t.merge deserialize
+  t.merge entrypoints
+
+end MultiStark
+
+end
diff --git a/Ix/MultiStark/Deserialize.lean b/Ix/MultiStark/Deserialize.lean
new file mode 100644
index 00000000..2108a5c6
--- /dev/null
+++ b/Ix/MultiStark/Deserialize.lean
@@ -0,0 +1,385 @@
+module
+public import Ix.Aiur.Meta
+public import Ix.IxVM.Core
+public import Ix.IxVM.ByteStream
+
+/-!
+# Multi-STARK proof deserializer (Aiur)
+
+Aiur deserializer for `multi_stark::prover::Proof`, mirroring the hand-written
+bincode reader in `multi-stark/src/manual_codec.rs` (`fn deserialize`). Only
+the deserializer is ported (no serializer).
+
+The wire format is bincode `standard().with_little_endian().with_fixed_int_encoding()`:
+
+* every `Vec` / length prefix : `u64`, 8 bytes little-endian
+* `Option`                    : 1 tag byte (`0` = None, `1` = Some), then value
+* `u8`                        : 1 byte
+* struct                      : fields in declaration order, no framing
+* `[T; N]` / tuple            : N elements back-to-back, no length prefix
+* `Val` (Goldilocks)          : raw `u64`, 8 bytes LE (NOT reduced mod p)
+* `ExtVal` (deg-2 ext)        : `[u64; 2]`, 16 bytes LE
+* Merkle digest               : `[u64; 4]`, 32 bytes LE
+
+`read_proof` builds a `Proof` object; `Ix/MultiStark.lean` hangs the entrypoint
+(and eventual verification logic) off it.
+-/
+
+public section
+
+namespace MultiStark
+
+def deserialize := ⟦
+  -- ==========================================================================
+  -- Wire-level type mirrors of `multi-stark/src/manual_codec.rs`.
+  -- `U64`/`ByteStream` come from `IxVM.byteStream`; raw Goldilocks `Val` is the
+  -- non-canonical `u64`, kept here as the 8 little-endian bytes (`U64`).
+  -- ==========================================================================
+
+  -- `ExtVal = BinomialExtensionField<Goldilocks, 2>` -> `[u64; 2]`.
+  type Ext = [U64; 2]
+
+  -- A Merkle digest: `[u64; DIGEST_ELEMS]` with `DIGEST_ELEMS = 4`.
+  type Digest = [U64; 4]
+
+  -- `Commitment = MerkleCap<..>`: only the `cap: Vec<Digest>` is on the wire.
+  type MerkleCap = List‹Digest›
+
+  -- `OpenedValuesForRound<ExtVal> = Vec<Vec<Vec<ExtVal>>>`.
+  type OpenedRound = List‹List‹List‹Ext›››
+
+  -- The three trace/quotient commitments at the head of a `Proof`.
+  enum Commitments {
+    Mk(MerkleCap, MerkleCap, MerkleCap)
+  }
+
+  -- `BatchOpening<Val, Mmcs>`: per-round input opening of a FRI query.
+  --   opened_values : Vec<Vec<Val>>   (one row of base-field values per matrix)
+  --   opening_proof : Vec<Digest>     (Merkle authentication path)
+  enum BatchOpening {
+    Mk(List‹List‹U64››, List‹Digest›)
+  }
+
+  -- `CommitPhaseProofStep<ExtVal, ExtMmcs>`: one folding step of a FRI query.
+  --   log_arity      : u8
+  --   sibling_values : Vec<Ext>  (arity - 1 siblings)
+  --   opening_proof  : Vec<Digest>
+  enum CommitPhaseProofStep {
+    Mk(U8, List‹Ext›, List‹Digest›)
+  }
+
+  -- `QueryProof<ExtVal, ExtMmcs, Vec<BatchOpening<Val, Mmcs>>>`.
+  enum QueryProof {
+    Mk(List‹BatchOpening›, List‹CommitPhaseProofStep›)
+  }
+
+  -- `PcsProof = FriProof<ExtVal, ExtMmcs, Witness = Val, ..>`.
+  --   commit_phase_commits : Vec<MerkleCap>
+  --   commit_pow_witnesses : Vec<Val>
+  --   query_proofs         : Vec<QueryProof>
+  --   final_poly           : Vec<Ext>
+  --   query_pow_witness    : Val
+  enum FriProof {
+    Mk(List‹MerkleCap›, List‹U64›, List‹QueryProof›, List‹Ext›, U64)
+  }
+
+  -- `Option<OpenedRound>` for `preprocessed_opened_values`. A dedicated
+  -- non-generic enum keeps constructor inference unambiguous.
+  enum PreprocessedOpt {
+    NoPreprocessed,
+    SomePreprocessed(OpenedRound)
+  }
+
+  -- Mirror of `multi_stark::prover::Proof`, in serialization order.
+  enum Proof {
+    Mk(
+      Commitments,        -- commitments
+      List‹Ext›,          -- intermediate_accumulators
+      List‹U8›,           -- log_degrees
+      FriProof,           -- opening_proof
+      OpenedRound,        -- quotient_opened_values
+      PreprocessedOpt,    -- preprocessed_opened_values
+      OpenedRound,        -- stage_1_opened_values
+      OpenedRound         -- stage_2_opened_values
+    )
+  }
+
+  -- ==========================================================================
+  -- Byte-reading primitives. Every reader threads the remaining `ByteStream`.
+  -- ==========================================================================
+
+  fn read_u8(stream: ByteStream) -> (U8, ByteStream) {
+    match load(stream) {
+      ListNode.Cons(byte, rest) => (byte, rest),
+      ListNode.Nil => (0u8, store(ListNode.Nil)),
+    }
+  }
+
+  -- One raw `u64`: 8 little-endian bytes, kept as `U64` (no field reduction).
+  fn read_u64(stream: ByteStream) -> (U64, ByteStream) {
+    let (b0, s0) = read_u8(stream);
+    let (b1, s1) = read_u8(s0);
+    let (b2, s2) = read_u8(s1);
+    let (b3, s3) = read_u8(s2);
+    let (b4, s4) = read_u8(s3);
+    let (b5, s5) = read_u8(s4);
+    let (b6, s6) = read_u8(s5);
+    let (b7, s7) = read_u8(s6);
+    ([b0, b1, b2, b3, b4, b5, b6, b7], s7)
+  }
+
+  -- A bincode length prefix: a fixed-width `u64`, flattened to a field count.
+  -- `flatten_u64` asserts the top byte is zero (< 2^56), always true for the
+  -- list lengths in a proof.
+  fn read_count(stream: ByteStream) -> (G, ByteStream) {
+    let (val, s) = read_u64(stream);
+    (flatten_u64(val), s)
+  }
+
+  -- `ExtVal` -> `[u64; 2]`, no length prefix.
+  fn read_ext(stream: ByteStream) -> (Ext, ByteStream) {
+    let (a, s0) = read_u64(stream);
+    let (b, s1) = read_u64(s0);
+    ([a, b], s1)
+  }
+
+  -- Merkle digest -> `[u64; 4]`, no length prefix.
+  fn read_digest(stream: ByteStream) -> (Digest, ByteStream) {
+    let (a, s0) = read_u64(stream);
+    let (b, s1) = read_u64(s0);
+    let (c, s2) = read_u64(s1);
+    let (d, s3) = read_u64(s2);
+    ([a, b, c, d], s3)
+  }
+
+  -- ==========================================================================
+  -- Length-prefixed `Vec<T>` readers. First-order, so one pair per element
+  -- type: `read_T_vec` reads the `u64` length then loops `read_T_vec_n`.
+  -- ==========================================================================
+
+  fn read_u8_vec(stream: ByteStream) -> (List‹U8›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_u8_vec_n(s, n)
+  }
+  fn read_u8_vec_n(stream: ByteStream, n: G) -> (List‹U8›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_u8(stream);
+        let (rest, s2) = read_u8_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_u64_vec(stream: ByteStream) -> (List‹U64›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_u64_vec_n(s, n)
+  }
+  fn read_u64_vec_n(stream: ByteStream, n: G) -> (List‹U64›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_u64(stream);
+        let (rest, s2) = read_u64_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_ext_vec(stream: ByteStream) -> (List‹Ext›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_ext_vec_n(s, n)
+  }
+  fn read_ext_vec_n(stream: ByteStream, n: G) -> (List‹Ext›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_ext(stream);
+        let (rest, s2) = read_ext_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_digest_vec(stream: ByteStream) -> (List‹Digest›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_digest_vec_n(s, n)
+  }
+  fn read_digest_vec_n(stream: ByteStream, n: G) -> (List‹Digest›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_digest(stream);
+        let (rest, s2) = read_digest_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  -- `Vec<Vec<Val>>` for `BatchOpening.opened_values`.
+  fn read_u64_vec_vec(stream: ByteStream) -> (List‹List‹U64››, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_u64_vec_vec_n(s, n)
+  }
+  fn read_u64_vec_vec_n(stream: ByteStream, n: G) -> (List‹List‹U64››, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_u64_vec(stream);
+        let (rest, s2) = read_u64_vec_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  -- `OpenedRound = Vec<Vec<Vec<Ext>>>`, built bottom-up from `read_ext_vec`.
+  fn read_ext_vec_vec(stream: ByteStream) -> (List‹List‹Ext››, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_ext_vec_vec_n(s, n)
+  }
+  fn read_ext_vec_vec_n(stream: ByteStream, n: G) -> (List‹List‹Ext››, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_ext_vec(stream);
+        let (rest, s2) = read_ext_vec_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_opened_round(stream: ByteStream) -> (OpenedRound, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_opened_round_n(s, n)
+  }
+  fn read_opened_round_n(stream: ByteStream, n: G) -> (OpenedRound, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_ext_vec_vec(stream);
+        let (rest, s2) = read_opened_round_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  -- ==========================================================================
+  -- Struct readers (fields in declaration order, no framing).
+  -- ==========================================================================
+
+  fn read_merkle_cap(stream: ByteStream) -> (MerkleCap, ByteStream) {
+    read_digest_vec(stream)
+  }
+  fn read_merkle_cap_vec(stream: ByteStream) -> (List‹MerkleCap›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_merkle_cap_vec_n(s, n)
+  }
+  fn read_merkle_cap_vec_n(stream: ByteStream, n: G) -> (List‹MerkleCap›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_merkle_cap(stream);
+        let (rest, s2) = read_merkle_cap_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_commitments(stream: ByteStream) -> (Commitments, ByteStream) {
+    let (stage_1_trace, s0) = read_merkle_cap(stream);
+    let (stage_2_trace, s1) = read_merkle_cap(s0);
+    let (quotient_chunks, s2) = read_merkle_cap(s1);
+    (Commitments.Mk(stage_1_trace, stage_2_trace, quotient_chunks), s2)
+  }
+
+  fn read_batch_opening(stream: ByteStream) -> (BatchOpening, ByteStream) {
+    let (opened_values, s0) = read_u64_vec_vec(stream);
+    let (opening_proof, s1) = read_digest_vec(s0);
+    (BatchOpening.Mk(opened_values, opening_proof), s1)
+  }
+  fn read_batch_opening_vec(stream: ByteStream) -> (List‹BatchOpening›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_batch_opening_vec_n(s, n)
+  }
+  fn read_batch_opening_vec_n(stream: ByteStream, n: G) -> (List‹BatchOpening›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_batch_opening(stream);
+        let (rest, s2) = read_batch_opening_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_commit_phase_step(stream: ByteStream) -> (CommitPhaseProofStep, ByteStream) {
+    let (log_arity, s0) = read_u8(stream);
+    let (sibling_values, s1) = read_ext_vec(s0);
+    let (opening_proof, s2) = read_digest_vec(s1);
+    (CommitPhaseProofStep.Mk(log_arity, sibling_values, opening_proof), s2)
+  }
+  fn read_commit_phase_step_vec(stream: ByteStream) -> (List‹CommitPhaseProofStep›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_commit_phase_step_vec_n(s, n)
+  }
+  fn read_commit_phase_step_vec_n(stream: ByteStream, n: G) -> (List‹CommitPhaseProofStep›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_commit_phase_step(stream);
+        let (rest, s2) = read_commit_phase_step_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_query_proof(stream: ByteStream) -> (QueryProof, ByteStream) {
+    let (input_proof, s0) = read_batch_opening_vec(stream);
+    let (commit_phase_openings, s1) = read_commit_phase_step_vec(s0);
+    (QueryProof.Mk(input_proof, commit_phase_openings), s1)
+  }
+  fn read_query_proof_vec(stream: ByteStream) -> (List‹QueryProof›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_query_proof_vec_n(s, n)
+  }
+  fn read_query_proof_vec_n(stream: ByteStream, n: G) -> (List‹QueryProof›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_query_proof(stream);
+        let (rest, s2) = read_query_proof_vec_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_fri_proof(stream: ByteStream) -> (FriProof, ByteStream) {
+    let (commit_phase_commits, s0) = read_merkle_cap_vec(stream);
+    let (commit_pow_witnesses, s1) = read_u64_vec(s0);
+    let (query_proofs, s2) = read_query_proof_vec(s1);
+    let (final_poly, s3) = read_ext_vec(s2);
+    let (query_pow_witness, s4) = read_u64(s3);
+    (FriProof.Mk(commit_phase_commits, commit_pow_witnesses, query_proofs,
+                 final_poly, query_pow_witness), s4)
+  }
+
+  -- `Option<OpenedRound>`: 1 tag byte, then the value when `Some`.
+  fn read_preprocessed(stream: ByteStream) -> (PreprocessedOpt, ByteStream) {
+    let (tag, s) = read_u8(stream);
+    match tag {
+      0 => (PreprocessedOpt.NoPreprocessed, s),
+      _ =>
+        let (r, s2) = read_opened_round(s);
+        (PreprocessedOpt.SomePreprocessed(r), s2),
+    }
+  }
+
+  -- Full `Proof` in declaration order (see `manual_codec.rs::Reader::proof`).
+  fn read_proof(stream: ByteStream) -> (Proof, ByteStream) {
+    let (commitments, s0) = read_commitments(stream);
+    let (intermediate_accumulators, s1) = read_ext_vec(s0);
+    let (log_degrees, s2) = read_u8_vec(s1);
+    let (opening_proof, s3) = read_fri_proof(s2);
+    let (quotient_opened_values, s4) = read_opened_round(s3);
+    let (preprocessed_opened_values, s5) = read_preprocessed(s4);
+    let (stage_1_opened_values, s6) = read_opened_round(s5);
+    let (stage_2_opened_values, s7) = read_opened_round(s6);
+    (Proof.Mk(commitments, intermediate_accumulators, log_degrees, opening_proof,
+              quotient_opened_values, preprocessed_opened_values,
+              stage_1_opened_values, stage_2_opened_values), s7)
+  }
+⟧
+
+end MultiStark
+
+end

From 091733af9bc86127bc702a1ce4b33bcca6401edd Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 14:48:59 -0300
Subject: [PATCH 02/28] recursive verifier entrypoint

---
 RecursiveVerifier.lean | 94 ++++++++++++++++++++++++++++++++++++++++++
 lakefile.lean          |  4 ++
 2 files changed, 98 insertions(+)
 create mode 100644 RecursiveVerifier.lean

diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
new file mode 100644
index 00000000..45d65afc
--- /dev/null
+++ b/RecursiveVerifier.lean
@@ -0,0 +1,94 @@
+import Ix.Aiur.Meta
+import Ix.Aiur.Protocol
+import Ix.Aiur.Compiler
+import Ix.Aiur.Statistics
+import Ix.MultiStark
+import Tests.Aiur.Common
+
+/-!
+# Recursive verifier end-to-end test
+
+A standalone executable that exercises the Multi-STARK verifier scaffold
+(`Ix/MultiStark.lean`) against a real proof:
+
+1. Define a tiny Aiur toplevel with a `factorial` entrypoint.
+2. Prove `factorial(5) = 120` with the Multi-STARK backend.
+3. Serialize that proof (`Proof.toBytes`, bincode) and seed it on the IO
+   channel under key `[0]` — the hint the recursive verifier reads
+   non-deterministically via `#read_byte_stream`.
+4. Run the `verify_multi_stark_proof` entrypoint over that IO buffer and
+   prove *its* execution, producing a recursive proof.
+
+Run with `lake exe recursive-verifier`.
+-/
+
+open Aiur
+
+/-- A tiny Aiur program: `factorial` as the proving entrypoint. -/
+def factorialProgram : Source.Toplevel := ⟦
+  pub fn factorial(n: G) -> G {
+    match n {
+      0 => 1,
+      _ => n * factorial(n - 1),
+    }
+  }
+⟧
+
+/-- Compile a toplevel and build its proving system, or fail with a message. -/
+def buildSystem (label : String) (top : Source.Toplevel) :
+    IO (CompiledToplevel × AiurSystem) := do
+  let compiled ← match top.compile with
+    | .error e => throw <| IO.userError s!"{label}: compilation failed: {e}"
+    | .ok c => pure c
+  pure (compiled, AiurSystem.build compiled.bytecode commitmentParameters)
+
+def main : IO UInt32 := do
+  -- ── 1. factorial system ──────────────────────────────────────────────────
+  let (facCompiled, facSystem) ← buildSystem "factorial" factorialProgram
+  let facIdx ← match facCompiled.getFuncIdx `factorial with
+    | some i => pure i
+    | none => IO.eprintln "factorial entrypoint not found"; return 1
+
+  -- ── 2. prove factorial(5) = 120 ──────────────────────────────────────────
+  -- `G` is a reserved token (the Aiur DSL), so spell the field type qualified.
+  let input := #[Aiur.G.ofNat 5]
+  let (claim, proof, _) := facSystem.prove friParameters facIdx input default
+  let expected := buildClaim facIdx input #[Aiur.G.ofNat 120]
+  if claim != expected then
+    IO.eprintln s!"unexpected factorial claim:\n  got {claim}\n  want {expected}"
+    return 1
+  IO.println s!"✓ proved factorial(5) = 120  (claim = {claim})"
+  match facSystem.verify friParameters claim (.ofBytes proof.toBytes) with
+  | .error e => IO.eprintln s!"inner proof failed to verify: {e}"; return 1
+  | .ok _ => IO.println "✓ inner proof verifies"
+
+  -- ── 3. serialize proof, seed it as the IO hint under key [0] ─────────────
+  let proofBytes := proof.toBytes
+  IO.println s!"  serialized proof: {proofBytes.size} bytes"
+  let proofGs : Array Aiur.G := proofBytes.data.map .ofUInt8
+  let verifierIO : IOBuffer := (default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs
+
+  -- ── 4. recursive verifier system ─────────────────────────────────────────
+  let vTop ← match MultiStark.multiStark with
+    | .error e => IO.eprintln s!"verifier toplevel merge failed: {e}"; return 1
+    | .ok t => pure t
+  let (vCompiled, vSystem) ← buildSystem "verifier" vTop
+  let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
+    | some i => pure i
+    | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1
+
+  -- ── 5. run the verifier over the hint, producing a recursive proof ───────
+  let (vClaim, vProof, _) := vSystem.prove friParameters vIdx #[] verifierIO
+  IO.println s!"✓ recursive verifier executed  (claim = {vClaim})"
+  match vSystem.verify friParameters vClaim (.ofBytes vProof.toBytes) with
+  | .error e => IO.eprintln s!"recursive proof failed to verify: {e}"; return 1
+  | .ok _ => IO.println "✓ recursive proof verifies"
+  IO.println s!"  recursive proof: {vProof.toBytes.size} bytes"
+
+  -- ── 6. circuit statistics for the recursive proof ────────────────────────
+  IO.println "\n── recursive proof statistics ──"
+  match vCompiled.bytecode.execute vIdx #[] verifierIO with
+  | .error e => IO.eprintln s!"verifier execution failed: {e}"; return 1
+  | .ok (_, _, queryCounts) =>
+    Aiur.printStats (Aiur.computeStats vCompiled queryCounts)
+  pure 0
diff --git a/lakefile.lean b/lakefile.lean
index 5362daa8..853c8191 100644
--- a/lakefile.lean
+++ b/lakefile.lean
@@ -98,6 +98,10 @@ lean_exe «arena-exclude» where
   root := `Tests.Ix.Kernel.ArenaExclude
   supportInterpreter := true
 
+lean_exe «recursive-verifier» where
+  root := `RecursiveVerifier
+  supportInterpreter := true
+
 end Tests
 
 section Benchmarks

From 880641e7968bad681d0882c36ecb91dbbd6f0fee Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 15:49:17 -0300
Subject: [PATCH 03/28] keccak

---
 Ix/MultiStark/Keccak.lean | 343 ++++++++++++++++++++++++++++++++++++++
 KeccakCheck.lean          |  38 +++++
 lakefile.lean             |   4 +
 3 files changed, 385 insertions(+)
 create mode 100644 Ix/MultiStark/Keccak.lean
 create mode 100644 KeccakCheck.lean

diff --git a/Ix/MultiStark/Keccak.lean b/Ix/MultiStark/Keccak.lean
new file mode 100644
index 00000000..352828f1
--- /dev/null
+++ b/Ix/MultiStark/Keccak.lean
@@ -0,0 +1,343 @@
+module
+public import Ix.Aiur.Meta
+public import Ix.IxVM.Core
+public import Ix.IxVM.ByteStream
+
+/-!
+# Keccak-256 in Aiur
+
+The hash multi-stark uses (`p3_keccak`: `Keccak256Hash` + `KeccakF` permutation,
+keccak-f[1600] sponge). This is the Ethereum/original Keccak variant
+(`tiny_keccak::Keccak::v256`): pad `10*1` with first pad byte `0x01` and final
+`0x80`, rate = 136 bytes (17 lanes), capacity 512, 256-bit output.
+
+Representation:
+* lane  = 64-bit word = `[U8; 8]`, little-endian bytes (byte 0 = least significant).
+* state = `[Lane; 25]`, lane index `L(x,y) = x + 5*y`.
+
+Bit ops use the `u8` gadgets (`u8_xor`/`u8_and`; NOT = `u8_xor(b, 0xFF)`).
+Rotations go through `u8_bit_decomposition` (u8 → `[field; 8]`) and a generic
+bit-list rotate, recomposing bytes with `u8_from_field_unsafe`.
+
+**Scope:** single-block only — message length ≤ 135 bytes (one keccak-f call).
+Multi-block absorption is future work.
+-/
+
+public section
+
+namespace MultiStark
+
+def keccak := ⟦
+  -- A 64-bit Keccak lane: 8 little-endian bytes.
+  type Lane = [U8; 8]
+
+  -- ==========================================================================
+  -- Lane bit-logic (byte-wise u8 gadgets).
+  -- ==========================================================================
+
+  fn xor8(a: Lane, b: Lane) -> Lane {
+    [u8_xor(a[0], b[0]), u8_xor(a[1], b[1]), u8_xor(a[2], b[2]), u8_xor(a[3], b[3]),
+     u8_xor(a[4], b[4]), u8_xor(a[5], b[5]), u8_xor(a[6], b[6]), u8_xor(a[7], b[7])]
+  }
+
+  fn and8(a: Lane, b: Lane) -> Lane {
+    [u8_and(a[0], b[0]), u8_and(a[1], b[1]), u8_and(a[2], b[2]), u8_and(a[3], b[3]),
+     u8_and(a[4], b[4]), u8_and(a[5], b[5]), u8_and(a[6], b[6]), u8_and(a[7], b[7])]
+  }
+
+  -- Bitwise NOT via XOR with 0xFF (keeps the byte `u8`-typed).
+  fn not8(a: Lane) -> Lane {
+    [u8_xor(a[0], 255u8), u8_xor(a[1], 255u8), u8_xor(a[2], 255u8), u8_xor(a[3], 255u8),
+     u8_xor(a[4], 255u8), u8_xor(a[5], 255u8), u8_xor(a[6], 255u8), u8_xor(a[7], 255u8)]
+  }
+
+  -- ==========================================================================
+  -- Generic cyclic left-rotation of a lane by `n` bits (0 ≤ n < 64).
+  -- Decompose to a 64-element bit list (LSB first), reindex, recompose.
+  -- ==========================================================================
+
+  -- Prepend the 8 bits of a byte decomposition (LSB first) onto `tail`.
+  fn cons8(d: [G; 8], tail: List‹G›) -> List‹G› {
+    store(ListNode.Cons(d[0], store(ListNode.Cons(d[1], store(ListNode.Cons(d[2],
+    store(ListNode.Cons(d[3], store(ListNode.Cons(d[4], store(ListNode.Cons(d[5],
+    store(ListNode.Cons(d[6], store(ListNode.Cons(d[7], tail))))))))))))))))
+  }
+
+  -- Lane → 64 bits, index z = 8*byte + bit, LSB first.
+  fn lane_bits(l: Lane) -> List‹G› {
+    cons8(u8_bit_decomposition(l[0]),
+    cons8(u8_bit_decomposition(l[1]),
+    cons8(u8_bit_decomposition(l[2]),
+    cons8(u8_bit_decomposition(l[3]),
+    cons8(u8_bit_decomposition(l[4]),
+    cons8(u8_bit_decomposition(l[5]),
+    cons8(u8_bit_decomposition(l[6]),
+    cons8(u8_bit_decomposition(l[7]), store(ListNode.Nil)))))))))
+  }
+
+  -- x mod 64 for x in [0, 127]: drop the 64-bit (bit 6).
+  fn mod64(x: G) -> G {
+    let bits = u8_bit_decomposition(u8_from_field_unsafe(x));
+    x - 64 * bits[6]
+  }
+
+  -- Recompose a byte (as `u8`) from bits[off .. off+7] (LSB first). Indexed
+  -- access is inlined as `list_drop` (not `list_lookup`): `list_drop` recurses
+  -- on the same list pointer, so the consecutive drops here share the cached
+  -- drop-chain (far better caching than `list_lookup`).
+  fn byte_from_bits(bs: List‹G›, off: G) -> U8 {
+    -- One `list_drop` to the byte's first bit; the remaining 7 bits are the
+    -- successive tails (a deref each), so no further drops are needed.
+    let &ListNode.Cons(b0, r0) = list_drop(bs, off);
+    let &ListNode.Cons(b1, r1) = r0;
+    let &ListNode.Cons(b2, r2) = r1;
+    let &ListNode.Cons(b3, r3) = r2;
+    let &ListNode.Cons(b4, r4) = r3;
+    let &ListNode.Cons(b5, r5) = r4;
+    let &ListNode.Cons(b6, r6) = r5;
+    let &ListNode.Cons(b7, _) = r6;
+    u8_from_field_unsafe(
+      b0 + 2 * b1 + 4 * b2 + 8 * b3 + 16 * b4 + 32 * b5 + 64 * b6 + 128 * b7)
+  }
+
+  fn lane_from_bits(bs: List‹G›) -> Lane {
+    [byte_from_bits(bs, 0), byte_from_bits(bs, 8), byte_from_bits(bs, 16), byte_from_bits(bs, 24),
+     byte_from_bits(bs, 32), byte_from_bits(bs, 40), byte_from_bits(bs, 48), byte_from_bits(bs, 56)]
+  }
+
+  -- Build the rotated bit list: output position p (0..63) takes source
+  -- bit (p - n) mod 64 = mod64(p + 64 - n).
+  fn build_rot(bs: List‹G›, n: G, p: G) -> List‹G› {
+    match 64 - p {
+      0 => store(ListNode.Nil),
+      _ =>
+        let src = mod64(p + 64 - n);
+        let &ListNode.Cons(head, _) = list_drop(bs, src);
+        store(ListNode.Cons(head, build_rot(bs, n, p + 1))),
+    }
+  }
+
+  fn rotl(l: Lane, n: G) -> Lane {
+    lane_from_bits(build_rot(lane_bits(l), n, 0))
+  }
+
+  -- ==========================================================================
+  -- keccak-f[1600]: 24 rounds of θ ρ π χ ι.
+  -- ==========================================================================
+
+  fn keccak_round(s: [Lane; 25], rc: Lane) -> [Lane; 25] {
+    -- θ: column parities and the D correction.
+    let c0 = xor8(xor8(xor8(xor8(s[0], s[5]), s[10]), s[15]), s[20]);
+    let c1 = xor8(xor8(xor8(xor8(s[1], s[6]), s[11]), s[16]), s[21]);
+    let c2 = xor8(xor8(xor8(xor8(s[2], s[7]), s[12]), s[17]), s[22]);
+    let c3 = xor8(xor8(xor8(xor8(s[3], s[8]), s[13]), s[18]), s[23]);
+    let c4 = xor8(xor8(xor8(xor8(s[4], s[9]), s[14]), s[19]), s[24]);
+    let d0 = xor8(c4, rotl(c1, 1));
+    let d1 = xor8(c0, rotl(c2, 1));
+    let d2 = xor8(c1, rotl(c3, 1));
+    let d3 = xor8(c2, rotl(c4, 1));
+    let d4 = xor8(c3, rotl(c0, 1));
+    let a0 = xor8(s[0], d0);   let a1 = xor8(s[1], d1);   let a2 = xor8(s[2], d2);
+    let a3 = xor8(s[3], d3);   let a4 = xor8(s[4], d4);
+    let a5 = xor8(s[5], d0);   let a6 = xor8(s[6], d1);   let a7 = xor8(s[7], d2);
+    let a8 = xor8(s[8], d3);   let a9 = xor8(s[9], d4);
+    let a10 = xor8(s[10], d0); let a11 = xor8(s[11], d1); let a12 = xor8(s[12], d2);
+    let a13 = xor8(s[13], d3); let a14 = xor8(s[14], d4);
+    let a15 = xor8(s[15], d0); let a16 = xor8(s[16], d1); let a17 = xor8(s[17], d2);
+    let a18 = xor8(s[18], d3); let a19 = xor8(s[19], d4);
+    let a20 = xor8(s[20], d0); let a21 = xor8(s[21], d1); let a22 = xor8(s[22], d2);
+    let a23 = xor8(s[23], d3); let a24 = xor8(s[24], d4);
+    -- ρ + π: B[dest] = rotl(A'[src], offset).
+    let b0  = a0;
+    let b1  = rotl(a6, 44);
+    let b2  = rotl(a12, 43);
+    let b3  = rotl(a18, 21);
+    let b4  = rotl(a24, 14);
+    let b5  = rotl(a3, 28);
+    let b6  = rotl(a9, 20);
+    let b7  = rotl(a10, 3);
+    let b8  = rotl(a16, 45);
+    let b9  = rotl(a22, 61);
+    let b10 = rotl(a1, 1);
+    let b11 = rotl(a7, 6);
+    let b12 = rotl(a13, 25);
+    let b13 = rotl(a19, 8);
+    let b14 = rotl(a20, 18);
+    let b15 = rotl(a4, 27);
+    let b16 = rotl(a5, 36);
+    let b17 = rotl(a11, 10);
+    let b18 = rotl(a17, 15);
+    let b19 = rotl(a23, 56);
+    let b20 = rotl(a2, 62);
+    let b21 = rotl(a8, 55);
+    let b22 = rotl(a14, 39);
+    let b23 = rotl(a15, 41);
+    let b24 = rotl(a21, 2);
+    -- χ: A''[x][y] = B[x][y] ^ (¬B[x+1][y] & B[x+2][y]).
+    let e0  = xor8(b0,  and8(not8(b1),  b2));
+    let e1  = xor8(b1,  and8(not8(b2),  b3));
+    let e2  = xor8(b2,  and8(not8(b3),  b4));
+    let e3  = xor8(b3,  and8(not8(b4),  b0));
+    let e4  = xor8(b4,  and8(not8(b0),  b1));
+    let e5  = xor8(b5,  and8(not8(b6),  b7));
+    let e6  = xor8(b6,  and8(not8(b7),  b8));
+    let e7  = xor8(b7,  and8(not8(b8),  b9));
+    let e8  = xor8(b8,  and8(not8(b9),  b5));
+    let e9  = xor8(b9,  and8(not8(b5),  b6));
+    let e10 = xor8(b10, and8(not8(b11), b12));
+    let e11 = xor8(b11, and8(not8(b12), b13));
+    let e12 = xor8(b12, and8(not8(b13), b14));
+    let e13 = xor8(b13, and8(not8(b14), b10));
+    let e14 = xor8(b14, and8(not8(b10), b11));
+    let e15 = xor8(b15, and8(not8(b16), b17));
+    let e16 = xor8(b16, and8(not8(b17), b18));
+    let e17 = xor8(b17, and8(not8(b18), b19));
+    let e18 = xor8(b18, and8(not8(b19), b15));
+    let e19 = xor8(b19, and8(not8(b15), b16));
+    let e20 = xor8(b20, and8(not8(b21), b22));
+    let e21 = xor8(b21, and8(not8(b22), b23));
+    let e22 = xor8(b22, and8(not8(b23), b24));
+    let e23 = xor8(b23, and8(not8(b24), b20));
+    let e24 = xor8(b24, and8(not8(b20), b21));
+    -- ι: add round constant to lane (0,0).
+    let f0 = xor8(e0, rc);
+    [f0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14,
+     e15, e16, e17, e18, e19, e20, e21, e22, e23, e24]
+  }
+
+  fn keccak_f(s: [Lane; 25]) -> [Lane; 25] {
+    -- 24 round constants as little-endian byte lanes.
+    let rcs = [
+      [0x01u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      [0x82u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      [0x8au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x00u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      [0x8bu8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      [0x09u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x8au8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      [0x88u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      [0x09u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      [0x8bu8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      [0x8bu8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x89u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x03u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x02u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x80u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x0au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      [0x80u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      [0x08u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]
+    ];
+    fold(0..24, s, |acc, @i| keccak_round(acc, rcs[@i]))
+  }
+
+  -- ==========================================================================
+  -- Single-block sponge (keccak-256, message ≤ 135 bytes).
+  -- ==========================================================================
+
+  -- Pad bytes for positions i..135 once the message is exhausted. `first`=1 at
+  -- position `len` (gets the 0x01 pad bit); position 135 gets the 0x80 bit
+  -- (combining to 0x81 when len == 135).
+  -- `first` is 1 on the first pad position (gets 0x01), 0 afterwards. The 0x80
+  -- bit lands on position 135 (combining to 0x81 when first==1 there). Uses
+  -- `eq_zero` (an op) instead of a match so it stays tail-position-compilable.
+  fn build_pad(i: G, first: G) -> ByteStream {
+    match 136 - i {
+      0 => store(ListNode.Nil),
+      _ =>
+        store(ListNode.Cons(u8_from_field_unsafe(first + 128 * eq_zero(135 - i)),
+                            build_pad(i + 1, 0))),
+    }
+  }
+
+  -- Take one 136-byte rate block from `stream` starting at position `i`.
+  -- Returns (block, rest, full): a length-136 byte list, the unconsumed stream,
+  -- and full=1 if 136 message bytes were consumed (no padding yet), else 0 (the
+  -- message ended in this block, so pad10*1 was applied). When the stream is
+  -- already empty this yields an all-padding block with full=0 — the extra
+  -- block keccak appends when the message length is a multiple of 136.
+  fn read_block(stream: ByteStream, i: G) -> (ByteStream, ByteStream, G) {
+    match 136 - i {
+      0 => (store(ListNode.Nil), stream, 1),
+      _ => match load(stream) {
+        ListNode.Cons(b, rest) =>
+          let (blk, r, f) = read_block(rest, i + 1);
+          (store(ListNode.Cons(b, blk)), r, f),
+        ListNode.Nil => (build_pad(i, 1), store(ListNode.Nil), 0),
+      },
+    }
+  }
+
+  fn rate_lane(rate: ByteStream, b: G) -> Lane {
+    -- One `list_drop` to the lane's first byte; the next 7 are successive tails.
+    let &ListNode.Cons(r0, t0) = list_drop(rate, b);
+    let &ListNode.Cons(r1, t1) = t0;
+    let &ListNode.Cons(r2, t2) = t1;
+    let &ListNode.Cons(r3, t3) = t2;
+    let &ListNode.Cons(r4, t4) = t3;
+    let &ListNode.Cons(r5, t5) = t4;
+    let &ListNode.Cons(r6, t6) = t5;
+    let &ListNode.Cons(r7, _) = t6;
+    [r0, r1, r2, r3, r4, r5, r6, r7]
+  }
+
+  -- XOR one 136-byte rate block into the first 17 lanes, then permute.
+  fn absorb_one(state: [Lane; 25], rate: ByteStream) -> [Lane; 25] {
+    let s = [
+      xor8(state[0],  rate_lane(rate, 0)),   xor8(state[1],  rate_lane(rate, 8)),
+      xor8(state[2],  rate_lane(rate, 16)),  xor8(state[3],  rate_lane(rate, 24)),
+      xor8(state[4],  rate_lane(rate, 32)),  xor8(state[5],  rate_lane(rate, 40)),
+      xor8(state[6],  rate_lane(rate, 48)),  xor8(state[7],  rate_lane(rate, 56)),
+      xor8(state[8],  rate_lane(rate, 64)),  xor8(state[9],  rate_lane(rate, 72)),
+      xor8(state[10], rate_lane(rate, 80)),  xor8(state[11], rate_lane(rate, 88)),
+      xor8(state[12], rate_lane(rate, 96)),  xor8(state[13], rate_lane(rate, 104)),
+      xor8(state[14], rate_lane(rate, 112)), xor8(state[15], rate_lane(rate, 120)),
+      xor8(state[16], rate_lane(rate, 128)),
+      state[17], state[18], state[19], state[20], state[21], state[22], state[23], state[24]
+    ];
+    keccak_f(s)
+  }
+
+  -- Absorb every rate block of the (padded) message into the state.
+  fn absorb_blocks(stream: ByteStream, state: [Lane; 25]) -> [Lane; 25] {
+    let (block, rest, full) = read_block(stream, 0);
+    let st2 = absorb_one(state, block);
+    match full {
+      0 => st2,
+      _ => absorb_blocks(rest, st2),
+    }
+  }
+
+  -- keccak-256: absorb the byte stream into the zero state, then squeeze the
+  -- first 32 bytes (lanes 0..3) as the digest (4 little-endian byte lanes).
+  fn keccak256(bytes: ByteStream) -> [Lane; 4] {
+    let z = [0u8; 8];
+    let init = [z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z];
+    let s = absorb_blocks(bytes, init);
+    [s[0], s[1], s[2], s[3]]
+  }
+
+  -- ==========================================================================
+  -- Test entrypoint: hash the IO-channel bytes (key [0]) and return 32 bytes.
+  -- ==========================================================================
+
+  pub fn keccak256_test() -> [Lane; 4] {
+    let (idx, len) = io_get_info([0]);
+    let bytes = #read_byte_stream(idx, len);
+    keccak256(bytes)
+  }
+⟧
+
+/-- Standalone keccak-256 toplevel: `core` + `byteStream` + the implementation. -/
+def keccakToplevel : Except Aiur.Global Aiur.Source.Toplevel := do
+  let t ← IxVM.core.merge IxVM.byteStream
+  t.merge keccak
+
+end MultiStark
+
+end
diff --git a/KeccakCheck.lean b/KeccakCheck.lean
new file mode 100644
index 00000000..d624779f
--- /dev/null
+++ b/KeccakCheck.lean
@@ -0,0 +1,38 @@
+import Ix.MultiStark.Keccak
+import Ix.Aiur.Compiler
+import Ix.Aiur.Protocol
+import Ix.Keccak
+
+open Aiur
+
+def hb (x : UInt8) : String := let s := String.ofList (Nat.toDigits 16 x.toNat); if s.length == 1 then "0" ++ s else s
+def toHex (b : ByteArray) : String := b.data.foldl (fun s x => s ++ hb x) ""
+
+def runK (c : CompiledToplevel) (input : ByteArray) : Except String ByteArray := do
+  let idx := c.getFuncIdx `keccak256_test |>.get!
+  let io : IOBuffer := (default : IOBuffer).extend #[Aiur.G.ofNat 0] (input.data.map .ofUInt8)
+  match c.bytecode.execute idx #[] io with
+  | .error e => .error e
+  | .ok (out, _, _) => .ok ⟨out.map (fun g => UInt8.ofNat (g.n % 256))⟩
+
+def main : IO Unit := do
+  let compiled ← match MultiStark.keccakToplevel with
+    | .error g => IO.println s!"merge fail {g}"; return
+    | .ok t => match t.compile with
+      | .error e => IO.println s!"compile fail {e}"; return
+      | .ok c => pure c
+  IO.println "compiled ok"
+  let inputs : List ByteArray :=
+    [ "".toUTF8, "abc".toUTF8, "The quick brown fox jumps over the lazy dog".toUTF8,
+      (ByteArray.mk (Array.range 100 |>.map (fun n => UInt8.ofNat n))),
+      (ByteArray.mk (Array.range 135 |>.map (fun n => UInt8.ofNat n))),
+      (ByteArray.mk (Array.range 136 |>.map (fun n => UInt8.ofNat n))),
+      (ByteArray.mk (Array.range 200 |>.map (fun n => UInt8.ofNat n))),
+      (ByteArray.mk (Array.range 272 |>.map (fun n => UInt8.ofNat (n % 251)))) ]
+  for inp in inputs do
+    let expected := Keccak.hash inp
+    match runK compiled inp with
+    | .error e => IO.println s!"  [{inp.size}B] exec err: {e}"
+    | .ok got =>
+      let tag := if got == expected then "MATCH   " else "MISMATCH"
+      IO.println s!"  [{inp.size}B] {tag}  aiur={toHex got}  ref={toHex expected}"
diff --git a/lakefile.lean b/lakefile.lean
index 853c8191..4af4b7e2 100644
--- a/lakefile.lean
+++ b/lakefile.lean
@@ -102,6 +102,10 @@ lean_exe «recursive-verifier» where
   root := `RecursiveVerifier
   supportInterpreter := true
 
+lean_exe «keccak-check» where
+  root := `KeccakCheck
+  supportInterpreter := true
+
 end Tests
 
 section Benchmarks

From 05a2a7e62284cfa708890ca90fe6e0bcb4c7f5ff Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 18:31:22 -0300
Subject: [PATCH 04/28] recursive verifier keccak assertion

---
 Ix/MultiStark.lean     | 29 ++++++++++++++---------
 RecursiveVerifier.lean | 54 +++++++++++++++++++++++++++++-------------
 2 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index 9526ae4d..84df1763 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -3,16 +3,19 @@ public import Ix.Aiur.Meta
 public import Ix.IxVM.Core
 public import Ix.IxVM.ByteStream
 public import Ix.MultiStark.Deserialize
+public import Ix.MultiStark.Keccak
 
 /-!
 # Multi-STARK proof verifier (Aiur)
 
-Step 1 towards a recursive verifier: an Aiur entrypoint scaffold that
-deserializes a `multi_stark::prover::Proof` (see `Ix/MultiStark/Deserialize.lean`).
+A recursive-verifier scaffold that (a) deserializes a `multi_stark::prover::Proof`
+(`Ix/MultiStark/Deserialize.lean`) and (b) binds the received byte stream to a
+public digest: it recomputes `keccak256` (`Ix/MultiStark/Keccak.lean`, the hash
+multi-stark uses) over the bytes and asserts it equals the digest passed as
+public input.
 
-The deserializer is the wire-format reader; the actual verification logic will
-hang off `read_proof`. For now the entrypoint builds the `Proof` object and
-discards it.
+The remaining verification logic (FRI, Merkle, Fiat-Shamir) will hang off
+`read_proof`.
 -/
 
 public section
@@ -20,24 +23,28 @@ public section
 namespace MultiStark
 
 def entrypoints := ⟦
-  -- Read the proof bytes non-deterministically from IO key `[0]`, deserialize
-  -- into a `Proof` object, and assert the whole stream was consumed. The proof
-  -- is then discarded — actual verification logic will hang off `read_proof`.
-  pub fn verify_multi_stark_proof() {
+  -- Public input: the 32-byte keccak-256 digest of the proof, as 4 little-endian
+  -- byte lanes. Read the proof bytes non-deterministically from IO key `[0]`,
+  -- deserialize into a `Proof` object (asserting the whole stream was consumed),
+  -- then recompute keccak-256 over the same bytes and assert it equals `digest`
+  -- — binding the IO-fed bytes to the public commitment.
+  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4]) {
     let (idx, len) = io_get_info([0]);
     let bytes = #read_byte_stream(idx, len);
     let (_proof, rest) = read_proof(bytes);
     assert_eq!(load(rest), ListNode.Nil);
+    assert_eq!(keccak256(bytes), digest);
     ()
   }
 ⟧
 
 /-- The standalone Multi-STARK verifier toplevel: `core` (lists/options) +
-`byteStream` (`U64`, `flatten_u64`, `read_byte_stream`, …) + the deserializer
-and entrypoint. -/
+`byteStream` (`U64`, `flatten_u64`, `read_byte_stream`, …) + the deserializer,
+the keccak-256 implementation, and the entrypoint. -/
 def multiStark : Except Aiur.Global Aiur.Source.Toplevel := do
   let t ← IxVM.core.merge IxVM.byteStream
   let t ← t.merge deserialize
+  let t ← t.merge keccak
   t.merge entrypoints
 
 end MultiStark
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 45d65afc..616aba87 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -3,6 +3,7 @@ import Ix.Aiur.Protocol
 import Ix.Aiur.Compiler
 import Ix.Aiur.Statistics
 import Ix.MultiStark
+import Ix.Keccak
 import Tests.Aiur.Common
 
 /-!
@@ -24,6 +25,11 @@ Run with `lake exe recursive-verifier`.
 
 open Aiur
 
+private def toHex (b : ByteArray) : String :=
+  b.data.foldl (fun s x =>
+    let h := String.ofList (Nat.toDigits 16 x.toNat)
+    s ++ (if h.length == 1 then "0" ++ h else h)) ""
+
 /-- A tiny Aiur program: `factorial` as the proving entrypoint. -/
 def factorialProgram : Source.Toplevel := ⟦
   pub fn factorial(n: G) -> G {
@@ -42,6 +48,14 @@ def buildSystem (label : String) (top : Source.Toplevel) :
     | .ok c => pure c
   pure (compiled, AiurSystem.build compiled.bytecode commitmentParameters)
 
+/-- Minimal FRI parameters for the *inner* proof: keccak-256 over the serialized
+proof runs one keccak-f per 136 bytes, so we keep the proof small (≈ a few KB)
+to make the in-circuit hash tractable to execute. Security of the inner proof is
+irrelevant for this end-to-end test. -/
+def innerFri : FriParameters :=
+  { logFinalPolyLen := 0, maxLogArity := 1, numQueries := 1,
+    commitProofOfWorkBits := 0, queryProofOfWorkBits := 0 }
+
 def main : IO UInt32 := do
   -- ── 1. factorial system ──────────────────────────────────────────────────
   let (facCompiled, facSystem) ← buildSystem "factorial" factorialProgram
@@ -52,19 +66,25 @@ def main : IO UInt32 := do
   -- ── 2. prove factorial(5) = 120 ──────────────────────────────────────────
   -- `G` is a reserved token (the Aiur DSL), so spell the field type qualified.
   let input := #[Aiur.G.ofNat 5]
-  let (claim, proof, _) := facSystem.prove friParameters facIdx input default
+  let (claim, proof, _) := facSystem.prove innerFri facIdx input default
   let expected := buildClaim facIdx input #[Aiur.G.ofNat 120]
   if claim != expected then
     IO.eprintln s!"unexpected factorial claim:\n  got {claim}\n  want {expected}"
     return 1
   IO.println s!"✓ proved factorial(5) = 120  (claim = {claim})"
-  match facSystem.verify friParameters claim (.ofBytes proof.toBytes) with
+  match facSystem.verify innerFri claim (.ofBytes proof.toBytes) with
   | .error e => IO.eprintln s!"inner proof failed to verify: {e}"; return 1
   | .ok _ => IO.println "✓ inner proof verifies"
 
-  -- ── 3. serialize proof, seed it as the IO hint under key [0] ─────────────
+  -- ── 3. serialize proof, compute its keccak-256 digest (the public input) ──
   let proofBytes := proof.toBytes
-  IO.println s!"  serialized proof: {proofBytes.size} bytes"
+  let blocks := (proofBytes.size + 1) / 136 + 1
+  IO.println s!"  serialized proof: {proofBytes.size} bytes  (~{blocks} keccak-f blocks)"
+  let digest := Keccak.hash proofBytes
+  IO.println s!"  keccak256(proof) = {toHex digest}"
+  -- Public input: the 32 digest bytes (grouped by the entrypoint into [[U8;8];4]).
+  let digestInput : Array Aiur.G := digest.data.map .ofUInt8
+  -- IO hint under key [0]: the proof byte stream the verifier reads.
   let proofGs : Array Aiur.G := proofBytes.data.map .ofUInt8
   let verifierIO : IOBuffer := (default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs
 
@@ -72,23 +92,23 @@ def main : IO UInt32 := do
   let vTop ← match MultiStark.multiStark with
     | .error e => IO.eprintln s!"verifier toplevel merge failed: {e}"; return 1
     | .ok t => pure t
-  let (vCompiled, vSystem) ← buildSystem "verifier" vTop
+  let (vCompiled, _vSystem) ← buildSystem "verifier" vTop
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1
 
-  -- ── 5. run the verifier over the hint, producing a recursive proof ───────
-  let (vClaim, vProof, _) := vSystem.prove friParameters vIdx #[] verifierIO
-  IO.println s!"✓ recursive verifier executed  (claim = {vClaim})"
-  match vSystem.verify friParameters vClaim (.ofBytes vProof.toBytes) with
-  | .error e => IO.eprintln s!"recursive proof failed to verify: {e}"; return 1
-  | .ok _ => IO.println "✓ recursive proof verifies"
-  IO.println s!"  recursive proof: {vProof.toBytes.size} bytes"
-
-  -- ── 6. circuit statistics for the recursive proof ────────────────────────
-  IO.println "\n── recursive proof statistics ──"
-  match vCompiled.bytecode.execute vIdx #[] verifierIO with
-  | .error e => IO.eprintln s!"verifier execution failed: {e}"; return 1
+  -- ── 5. run the verifier: deserialize + recompute keccak256, assert == digest
+  IO.println "running verifier (deserialize + keccak256 over the proof bytes)…"
+  match vCompiled.bytecode.execute vIdx digestInput verifierIO with
+  | .error e => IO.eprintln s!"✗ verifier rejected the proof: {e}"; return 1
   | .ok (_, _, queryCounts) =>
+    IO.println "✓ verifier accepted: deserialized OK and keccak256(bytes) == digest"
+    -- ── 6. negative test: a tampered digest must be rejected ────────────────
+    let badDigest := digestInput.set! 0 (Aiur.G.ofNat ((digest.data[0]!.toNat + 1) % 256))
+    match vCompiled.bytecode.execute vIdx badDigest verifierIO with
+    | .error _ => IO.println "✓ tampered digest correctly rejected (assert_eq failed)"
+    | .ok _ => IO.eprintln "✗ tampered digest was NOT rejected"; return 1
+    -- ── 7. circuit statistics ───────────────────────────────────────────────
+    IO.println "\n── verifier circuit statistics ──"
     Aiur.printStats (Aiur.computeStats vCompiled queryCounts)
   pure 0

From 04934d179fac1d0ec8793de6acd2188e3ae122ae Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 20:12:00 -0300
Subject: [PATCH 05/28] `keccak_f` -> `keccak_f_fold`

reduces width significantly
---
 Ix/MultiStark/Keccak.lean | 70 ++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/Ix/MultiStark/Keccak.lean b/Ix/MultiStark/Keccak.lean
index 352828f1..a085fdf3 100644
--- a/Ix/MultiStark/Keccak.lean
+++ b/Ix/MultiStark/Keccak.lean
@@ -205,35 +205,45 @@ def keccak := ⟦
      e15, e16, e17, e18, e19, e20, e21, e22, e23, e24]
   }
 
-  fn keccak_f(s: [Lane; 25]) -> [Lane; 25] {
-    -- 24 round constants as little-endian byte lanes.
-    let rcs = [
-      [0x01u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      [0x82u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      [0x8au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x00u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      [0x8bu8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      [0x09u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x8au8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      [0x88u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      [0x09u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      [0x8bu8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      [0x8bu8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x89u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x03u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x02u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x80u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x0au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      [0x80u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      [0x08u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]
-    ];
-    fold(0..24, s, |acc, @i| keccak_round(acc, rcs[@i]))
+  -- Round constant for round `i` (little-endian byte lane). A match (not an
+  -- array `get`, which needs a literal index) so it can be indexed at runtime.
+  fn rc_lane(i: G) -> Lane {
+    match i {
+      0  => [0x01u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      1  => [0x82u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      2  => [0x8au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      3  => [0x00u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      4  => [0x8bu8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      5  => [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      6  => [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      7  => [0x09u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      8  => [0x8au8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      9  => [0x88u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      10 => [0x09u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      11 => [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      12 => [0x8bu8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      13 => [0x8bu8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      14 => [0x89u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      15 => [0x03u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      16 => [0x02u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      17 => [0x80u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      18 => [0x0au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
+      19 => [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      20 => [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      21 => [0x80u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
+      22 => [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
+      _  => [0x08u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+    }
+  }
+
+  -- Apply the last `n` keccak-f rounds recursively (round index 24 - n), so
+  -- each round is its own call frame rather than 24 inlined copies (`fold`),
+  -- keeping the circuit narrow. `keccak_f_fold(s, 24)` is the full permutation.
+  fn keccak_f_fold(s: [Lane; 25], n: G) -> [Lane; 25] {
+    match n {
+      0 => s,
+      _ => keccak_f_fold(keccak_round(s, rc_lane(24 - n)), n - 1),
+    }
   }
 
   -- ==========================================================================
@@ -300,7 +310,7 @@ def keccak := ⟦
       xor8(state[16], rate_lane(rate, 128)),
       state[17], state[18], state[19], state[20], state[21], state[22], state[23], state[24]
     ];
-    keccak_f(s)
+    keccak_f_fold(s, 24)
   }
 
   -- Absorb every rate block of the (padded) message into the state.

From e1e63268b499fa657f3b386e02ccdb66f7c8e792 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 20:16:11 -0300
Subject: [PATCH 06/28] `[U8; 8]` -> `&[U8; 8]`

---
 Ix/MultiStark/Keccak.lean | 105 +++++++++++++++++++++-----------------
 1 file changed, 57 insertions(+), 48 deletions(-)

diff --git a/Ix/MultiStark/Keccak.lean b/Ix/MultiStark/Keccak.lean
index a085fdf3..6caee659 100644
--- a/Ix/MultiStark/Keccak.lean
+++ b/Ix/MultiStark/Keccak.lean
@@ -28,27 +28,34 @@ public section
 namespace MultiStark
 
 def keccak := ⟦
-  -- A 64-bit Keccak lane: 8 little-endian bytes.
-  type Lane = [U8; 8]
+  -- A 64-bit Keccak lane: a pointer to 8 little-endian bytes. Storing lanes
+  -- behind a pointer keeps the state (and every lane-passing function) one
+  -- column wide instead of eight.
+  type Lane = &[U8; 8]
 
   -- ==========================================================================
   -- Lane bit-logic (byte-wise u8 gadgets).
   -- ==========================================================================
 
   fn xor8(a: Lane, b: Lane) -> Lane {
-    [u8_xor(a[0], b[0]), u8_xor(a[1], b[1]), u8_xor(a[2], b[2]), u8_xor(a[3], b[3]),
-     u8_xor(a[4], b[4]), u8_xor(a[5], b[5]), u8_xor(a[6], b[6]), u8_xor(a[7], b[7])]
+    let x = load(a);
+    let y = load(b);
+    store([u8_xor(x[0], y[0]), u8_xor(x[1], y[1]), u8_xor(x[2], y[2]), u8_xor(x[3], y[3]),
+           u8_xor(x[4], y[4]), u8_xor(x[5], y[5]), u8_xor(x[6], y[6]), u8_xor(x[7], y[7])])
   }
 
   fn and8(a: Lane, b: Lane) -> Lane {
-    [u8_and(a[0], b[0]), u8_and(a[1], b[1]), u8_and(a[2], b[2]), u8_and(a[3], b[3]),
-     u8_and(a[4], b[4]), u8_and(a[5], b[5]), u8_and(a[6], b[6]), u8_and(a[7], b[7])]
+    let x = load(a);
+    let y = load(b);
+    store([u8_and(x[0], y[0]), u8_and(x[1], y[1]), u8_and(x[2], y[2]), u8_and(x[3], y[3]),
+           u8_and(x[4], y[4]), u8_and(x[5], y[5]), u8_and(x[6], y[6]), u8_and(x[7], y[7])])
   }
 
   -- Bitwise NOT via XOR with 0xFF (keeps the byte `u8`-typed).
   fn not8(a: Lane) -> Lane {
-    [u8_xor(a[0], 255u8), u8_xor(a[1], 255u8), u8_xor(a[2], 255u8), u8_xor(a[3], 255u8),
-     u8_xor(a[4], 255u8), u8_xor(a[5], 255u8), u8_xor(a[6], 255u8), u8_xor(a[7], 255u8)]
+    let x = load(a);
+    store([u8_xor(x[0], 255u8), u8_xor(x[1], 255u8), u8_xor(x[2], 255u8), u8_xor(x[3], 255u8),
+           u8_xor(x[4], 255u8), u8_xor(x[5], 255u8), u8_xor(x[6], 255u8), u8_xor(x[7], 255u8)])
   }
 
   -- ==========================================================================
@@ -65,14 +72,15 @@ def keccak := ⟦
 
   -- Lane → 64 bits, index z = 8*byte + bit, LSB first.
   fn lane_bits(l: Lane) -> List‹G› {
-    cons8(u8_bit_decomposition(l[0]),
-    cons8(u8_bit_decomposition(l[1]),
-    cons8(u8_bit_decomposition(l[2]),
-    cons8(u8_bit_decomposition(l[3]),
-    cons8(u8_bit_decomposition(l[4]),
-    cons8(u8_bit_decomposition(l[5]),
-    cons8(u8_bit_decomposition(l[6]),
-    cons8(u8_bit_decomposition(l[7]), store(ListNode.Nil)))))))))
+    let v = load(l);
+    cons8(u8_bit_decomposition(v[0]),
+    cons8(u8_bit_decomposition(v[1]),
+    cons8(u8_bit_decomposition(v[2]),
+    cons8(u8_bit_decomposition(v[3]),
+    cons8(u8_bit_decomposition(v[4]),
+    cons8(u8_bit_decomposition(v[5]),
+    cons8(u8_bit_decomposition(v[6]),
+    cons8(u8_bit_decomposition(v[7]), store(ListNode.Nil)))))))))
   }
 
   -- x mod 64 for x in [0, 127]: drop the 64-bit (bit 6).
@@ -101,8 +109,8 @@ def keccak := ⟦
   }
 
   fn lane_from_bits(bs: List‹G›) -> Lane {
-    [byte_from_bits(bs, 0), byte_from_bits(bs, 8), byte_from_bits(bs, 16), byte_from_bits(bs, 24),
-     byte_from_bits(bs, 32), byte_from_bits(bs, 40), byte_from_bits(bs, 48), byte_from_bits(bs, 56)]
+    store([byte_from_bits(bs, 0), byte_from_bits(bs, 8), byte_from_bits(bs, 16), byte_from_bits(bs, 24),
+           byte_from_bits(bs, 32), byte_from_bits(bs, 40), byte_from_bits(bs, 48), byte_from_bits(bs, 56)])
   }
 
   -- Build the rotated bit list: output position p (0..63) takes source
@@ -209,30 +217,30 @@ def keccak := ⟦
   -- array `get`, which needs a literal index) so it can be indexed at runtime.
   fn rc_lane(i: G) -> Lane {
     match i {
-      0  => [0x01u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      1  => [0x82u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      2  => [0x8au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      3  => [0x00u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      4  => [0x8bu8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      5  => [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      6  => [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      7  => [0x09u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      8  => [0x8au8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      9  => [0x88u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      10 => [0x09u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      11 => [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      12 => [0x8bu8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      13 => [0x8bu8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      14 => [0x89u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      15 => [0x03u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      16 => [0x02u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      17 => [0x80u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      18 => [0x0au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ],
-      19 => [0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      20 => [0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
-      21 => [0x80u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8],
-      22 => [0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ],
-      _  => [0x08u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8],
+      0  => store([0x01u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ]),
+      1  => store([0x82u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ]),
+      2  => store([0x8au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      3  => store([0x00u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]),
+      4  => store([0x8bu8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ]),
+      5  => store([0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ]),
+      6  => store([0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]),
+      7  => store([0x09u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      8  => store([0x8au8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ]),
+      9  => store([0x88u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ]),
+      10 => store([0x09u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ]),
+      11 => store([0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ]),
+      12 => store([0x8bu8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ]),
+      13 => store([0x8bu8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      14 => store([0x89u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      15 => store([0x03u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      16 => store([0x02u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      17 => store([0x80u8, 0u8,    0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      18 => store([0x0au8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0u8   ]),
+      19 => store([0x0au8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]),
+      20 => store([0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]),
+      21 => store([0x80u8, 0x80u8, 0u8, 0u8,    0u8, 0u8, 0u8, 0x80u8]),
+      22 => store([0x01u8, 0u8,    0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8   ]),
+      _  => store([0x08u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]),
     }
   }
 
@@ -293,7 +301,7 @@ def keccak := ⟦
     let &ListNode.Cons(r5, t5) = t4;
     let &ListNode.Cons(r6, t6) = t5;
     let &ListNode.Cons(r7, _) = t6;
-    [r0, r1, r2, r3, r4, r5, r6, r7]
+    store([r0, r1, r2, r3, r4, r5, r6, r7])
   }
 
   -- XOR one 136-byte rate block into the first 17 lanes, then permute.
@@ -324,19 +332,20 @@ def keccak := ⟦
   }
 
   -- keccak-256: absorb the byte stream into the zero state, then squeeze the
-  -- first 32 bytes (lanes 0..3) as the digest (4 little-endian byte lanes).
-  fn keccak256(bytes: ByteStream) -> [Lane; 4] {
-    let z = [0u8; 8];
+  -- first 32 bytes (lanes 0..3). Lanes are `load`ed back to inline bytes so the
+  -- digest is a plain `[[U8; 8]; 4]`.
+  fn keccak256(bytes: ByteStream) -> [[U8; 8]; 4] {
+    let z = store([0u8; 8]);
     let init = [z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z];
     let s = absorb_blocks(bytes, init);
-    [s[0], s[1], s[2], s[3]]
+    [load(s[0]), load(s[1]), load(s[2]), load(s[3])]
   }
 
   -- ==========================================================================
   -- Test entrypoint: hash the IO-channel bytes (key [0]) and return 32 bytes.
   -- ==========================================================================
 
-  pub fn keccak256_test() -> [Lane; 4] {
+  pub fn keccak256_test() -> [[U8; 8]; 4] {
     let (idx, len) = io_get_info([0]);
     let bytes = #read_byte_stream(idx, len);
     keccak256(bytes)

From 1a99925727c4821eb25cc08ce3a626c658144cf7 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 20:22:58 -0300
Subject: [PATCH 07/28] keccak state defined

---
 Ix/MultiStark/Keccak.lean | 27 +++++++++++++++++----------
 KeccakCheck.lean          | 10 ++++++++++
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/Ix/MultiStark/Keccak.lean b/Ix/MultiStark/Keccak.lean
index 6caee659..ef48a6e8 100644
--- a/Ix/MultiStark/Keccak.lean
+++ b/Ix/MultiStark/Keccak.lean
@@ -33,6 +33,10 @@ def keccak := ⟦
   -- column wide instead of eight.
   type Lane = &[U8; 8]
 
+  -- The keccak-f state: a pointer to the 5×5 = 25 lanes. Passed behind a
+  -- pointer too, so state-threading functions stay one column wide.
+  type State = &[Lane; 25]
+
   -- ==========================================================================
   -- Lane bit-logic (byte-wise u8 gadgets).
   -- ==========================================================================
@@ -133,7 +137,8 @@ def keccak := ⟦
   -- keccak-f[1600]: 24 rounds of θ ρ π χ ι.
   -- ==========================================================================
 
-  fn keccak_round(s: [Lane; 25], rc: Lane) -> [Lane; 25] {
+  fn keccak_round(sp: State, rc: Lane) -> State {
+    let s = load(sp);
     -- θ: column parities and the D correction.
     let c0 = xor8(xor8(xor8(xor8(s[0], s[5]), s[10]), s[15]), s[20]);
     let c1 = xor8(xor8(xor8(xor8(s[1], s[6]), s[11]), s[16]), s[21]);
@@ -209,8 +214,8 @@ def keccak := ⟦
     let e24 = xor8(b24, and8(not8(b20), b21));
     -- ι: add round constant to lane (0,0).
     let f0 = xor8(e0, rc);
-    [f0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14,
-     e15, e16, e17, e18, e19, e20, e21, e22, e23, e24]
+    store([f0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14,
+           e15, e16, e17, e18, e19, e20, e21, e22, e23, e24])
   }
 
   -- Round constant for round `i` (little-endian byte lane). A match (not an
@@ -247,7 +252,7 @@ def keccak := ⟦
   -- Apply the last `n` keccak-f rounds recursively (round index 24 - n), so
   -- each round is its own call frame rather than 24 inlined copies (`fold`),
   -- keeping the circuit narrow. `keccak_f_fold(s, 24)` is the full permutation.
-  fn keccak_f_fold(s: [Lane; 25], n: G) -> [Lane; 25] {
+  fn keccak_f_fold(s: State, n: G) -> State {
     match n {
       0 => s,
       _ => keccak_f_fold(keccak_round(s, rc_lane(24 - n)), n - 1),
@@ -305,8 +310,9 @@ def keccak := ⟦
   }
 
   -- XOR one 136-byte rate block into the first 17 lanes, then permute.
-  fn absorb_one(state: [Lane; 25], rate: ByteStream) -> [Lane; 25] {
-    let s = [
+  fn absorb_one(sp: State, rate: ByteStream) -> State {
+    let state = load(sp);
+    let s = store([
       xor8(state[0],  rate_lane(rate, 0)),   xor8(state[1],  rate_lane(rate, 8)),
       xor8(state[2],  rate_lane(rate, 16)),  xor8(state[3],  rate_lane(rate, 24)),
       xor8(state[4],  rate_lane(rate, 32)),  xor8(state[5],  rate_lane(rate, 40)),
@@ -317,12 +323,12 @@ def keccak := ⟦
       xor8(state[14], rate_lane(rate, 112)), xor8(state[15], rate_lane(rate, 120)),
       xor8(state[16], rate_lane(rate, 128)),
       state[17], state[18], state[19], state[20], state[21], state[22], state[23], state[24]
-    ];
+    ]);
     keccak_f_fold(s, 24)
   }
 
   -- Absorb every rate block of the (padded) message into the state.
-  fn absorb_blocks(stream: ByteStream, state: [Lane; 25]) -> [Lane; 25] {
+  fn absorb_blocks(stream: ByteStream, state: State) -> State {
     let (block, rest, full) = read_block(stream, 0);
     let st2 = absorb_one(state, block);
     match full {
@@ -336,8 +342,9 @@ def keccak := ⟦
   -- digest is a plain `[[U8; 8]; 4]`.
   fn keccak256(bytes: ByteStream) -> [[U8; 8]; 4] {
     let z = store([0u8; 8]);
-    let init = [z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z];
-    let s = absorb_blocks(bytes, init);
+    let init = store([z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z]);
+    let sp = absorb_blocks(bytes, init);
+    let s = load(sp);
     [load(s[0]), load(s[1]), load(s[2]), load(s[3])]
   }
 
diff --git a/KeccakCheck.lean b/KeccakCheck.lean
index d624779f..e10d7adb 100644
--- a/KeccakCheck.lean
+++ b/KeccakCheck.lean
@@ -1,6 +1,7 @@
 import Ix.MultiStark.Keccak
 import Ix.Aiur.Compiler
 import Ix.Aiur.Protocol
+import Ix.Aiur.Statistics
 import Ix.Keccak
 
 open Aiur
@@ -36,3 +37,12 @@ def main : IO Unit := do
     | .ok got =>
       let tag := if got == expected then "MATCH   " else "MISMATCH"
       IO.println s!"  [{inp.size}B] {tag}  aiur={toHex got}  ref={toHex expected}"
+  -- Circuit statistics for one representative hash (272 bytes = 3 blocks).
+  let statInput := ByteArray.mk (Array.range 272 |>.map (fun n => UInt8.ofNat (n % 251)))
+  let idx := compiled.getFuncIdx `keccak256_test |>.get!
+  let io : IOBuffer := (default : IOBuffer).extend #[Aiur.G.ofNat 0] (statInput.data.map .ofUInt8)
+  match compiled.bytecode.execute idx #[] io with
+  | .error e => IO.println s!"stats exec err: {e}"
+  | .ok (_, _, queryCounts) =>
+    IO.println s!"\n── keccak256 circuit statistics ({statInput.size}-byte input, 3 blocks) ──"
+    Aiur.printStats (Aiur.computeStats compiled queryCounts)

From 9d48fe5b27be6c63e63be2e02b4aecad2ce126c9 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 20:44:27 -0300
Subject: [PATCH 08/28] recursive verifier stub

---
 Ix/MultiStark.lean          |   7 ++-
 Ix/MultiStark/Pcs.lean      |  34 +++++++++++
 Ix/MultiStark/Verifier.lean | 116 ++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 Ix/MultiStark/Pcs.lean
 create mode 100644 Ix/MultiStark/Verifier.lean

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index 84df1763..3c45891b 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -4,6 +4,8 @@ public import Ix.IxVM.Core
 public import Ix.IxVM.ByteStream
 public import Ix.MultiStark.Deserialize
 public import Ix.MultiStark.Keccak
+public import Ix.MultiStark.Pcs
+public import Ix.MultiStark.Verifier
 
 /-!
 # Multi-STARK proof verifier (Aiur)
@@ -31,9 +33,10 @@ def entrypoints := ⟦
   pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4]) {
     let (idx, len) = io_get_info([0]);
     let bytes = #read_byte_stream(idx, len);
-    let (_proof, rest) = read_proof(bytes);
+    let (proof, rest) = read_proof(bytes);
     assert_eq!(load(rest), ListNode.Nil);
     assert_eq!(keccak256(bytes), digest);
+    assert_eq!(verify(proof), 1);
     ()
   }
 ⟧
@@ -45,6 +48,8 @@ def multiStark : Except Aiur.Global Aiur.Source.Toplevel := do
   let t ← IxVM.core.merge IxVM.byteStream
   let t ← t.merge deserialize
   let t ← t.merge keccak
+  let t ← t.merge pcs
+  let t ← t.merge verifier
   t.merge entrypoints
 
 end MultiStark
diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean
new file mode 100644
index 00000000..c32c6e29
--- /dev/null
+++ b/Ix/MultiStark/Pcs.lean
@@ -0,0 +1,34 @@
+module
+public import Ix.Aiur.Meta
+public import Ix.MultiStark.Deserialize
+
+/-!
+# PCS verification stub
+
+`multi-stark/src/verifier.rs` calls `pcs.verify(coms_to_verify, opening_proof,
+&mut challenger)` (a `TwoAdicFriPcs` FRI verification: query openings, Merkle
+authentication paths, FRI folding consistency). That is the heaviest part of
+the verifier and is **stubbed** here — `pcs_verify` accepts any opening proof
+unconditionally.
+
+Replacing this stub with a real FRI verifier (against the keccak Merkle commits
+and the Fiat-Shamir challenger) is the remaining work for a sound recursive
+verifier.
+-/
+
+public section
+
+namespace MultiStark
+
+def pcs := ⟦
+  -- STUB: accept any FRI opening proof. Returns 1 ("accepted"). The argument is
+  -- ignored; a real implementation would check query openings, Merkle paths and
+  -- the FRI folding against the committed roots and sampled challenges.
+  fn pcs_verify(opening: FriProof) -> G {
+    1
+  }
+⟧
+
+end MultiStark
+
+end
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
new file mode 100644
index 00000000..78bdeb22
--- /dev/null
+++ b/Ix/MultiStark/Verifier.lean
@@ -0,0 +1,116 @@
+module
+public import Ix.Aiur.Meta
+public import Ix.IxVM.Core
+public import Ix.IxVM.ByteStream
+public import Ix.MultiStark.Deserialize
+public import Ix.MultiStark.Keccak
+public import Ix.MultiStark.Pcs
+
+/-!
+# Multi-STARK verifier (Aiur)
+
+Reimplementation of `multi-stark/src/verifier.rs` (`System::verify_multiple_claims`)
+over the deserialized `Proof` (`Ix/MultiStark/Deserialize.lean`).
+
+The Rust verifier runs these steps:
+
+1. **Shape check** — proof array dimensions match the system's circuit count and
+   column widths.
+2. **Accumulator balance** — the last intermediate accumulator is zero (all
+   lookup pushes/pulls cancel).
+3. **Fiat-Shamir replay** — reconstruct the Keccak challenger: observe
+   commitments / trace heights / claims, sample (lookup, fingerprint, α, ζ).
+4. **PCS verification** — FRI opening proofs (see `Ix/MultiStark/Pcs.lean`).
+5. **OOD evaluation** — recompute the composition polynomial at ζ and check
+   `composition(ζ) · inv_vanishing(ζ) == quotient(ζ)`.
+
+### Implemented here
+* Step 1 (the system-independent part): the proof is internally consistent —
+  `stage_1`, `stage_2` and `intermediate_accumulators` all have the same length
+  (the circuit count) and it is non-zero.
+* Step 2: accumulator balance — the last `intermediate_accumulator` is the zero
+  extension element.
+* Step 4: the PCS check, via the accept-stub `pcs_verify`.
+
+### Stubbed / TODO
+* Steps 3 and 5 need extension-field (`GoldilocksExt2`) arithmetic, evaluation
+  domains (vanishing polynomials, subgroup generators), the Keccak Fiat-Shamir
+  challenger (`SerializingChallenger64<HashChallenger<u8, Keccak256Hash, 32>>`,
+  which our `keccak256` can drive), and the per-circuit AIR constraint folder.
+  None of that infrastructure exists in Aiur yet, so these steps are left as the
+  next milestone. With the PCS stubbed and steps 3/5 absent, this verifier is a
+  **structural** verifier, not yet sound.
+-/
+
+public section
+
+namespace MultiStark
+
+def verifier := ⟦
+  -- Interpret a raw little-endian `u64` limb as a Goldilocks field element. The
+  -- field add/mul reduce mod p, so a non-canonical zero (the on-wire repr can be
+  -- `p = 0xFFFFFFFF00000001`, not `0`) maps to the field zero.
+  fn u64_to_field(b: U64) -> G {
+    to_field(b[0])
+      + 0x100 * to_field(b[1])
+      + 0x10000 * to_field(b[2])
+      + 0x1000000 * to_field(b[3])
+      + 0x100000000 * to_field(b[4])
+      + 0x10000000000 * to_field(b[5])
+      + 0x1000000000000 * to_field(b[6])
+      + 0x100000000000000 * to_field(b[7])
+  }
+
+  -- An on-wire extension element (`ExtVal = GoldilocksExt2`) is two raw `u64`
+  -- limbs. It is zero iff both limbs reduce to the field zero (mod p).
+  fn ext_is_zero(e: Ext) -> G {
+    eq_zero(u64_to_field(e[0])) * eq_zero(u64_to_field(e[1]))
+  }
+
+  -- 1 iff the LAST element of the accumulator list is the zero extension
+  -- element (Rust: `intermediate_accumulators.last() == Some(ExtVal::ZERO)`).
+  -- The empty list returns 0 (there is no last element to balance).
+  fn last_acc_is_zero(accs: List‹Ext›) -> G {
+    match load(accs) {
+      ListNode.Nil => 0,
+      ListNode.Cons(e, rest) =>
+        match load(rest) {
+          ListNode.Nil => ext_is_zero(e),
+          _ => last_acc_is_zero(rest),
+        },
+    }
+  }
+
+  -- Structural + accumulator verification of a deserialized proof.
+  --
+  -- Returns 1 on success; `assert_eq!` aborts the (proof) execution on any
+  -- failed check, exactly as the Rust verifier returns `Err`.
+  fn verify(proof: Proof) -> G {
+    match proof {
+      Proof.Mk(_commitments, accs, _log_degrees, opening,
+               _quotient, _preprocessed, stage_1, stage_2) =>
+        -- Step 1 (shape, system-independent): the per-round opened-value lists
+        -- and the accumulator list all have the same length = the circuit count.
+        let num_circuits = list_length(accs);
+        -- there must be at least one circuit (Rust: InvalidSystem)
+        assert_eq!(eq_zero(num_circuits), 0);
+        assert_eq!(list_length(stage_1), num_circuits);
+        assert_eq!(list_length(stage_2), num_circuits);
+
+        -- Step 2: accumulator balance — the last accumulator must be zero.
+        assert_eq!(last_acc_is_zero(accs), 1);
+
+        -- Step 4: PCS opening proof (stubbed — accepts; see Pcs.lean).
+        let _pcs = pcs_verify(opening);
+
+        -- Steps 3 (Fiat-Shamir challenger replay over keccak) and 5 (OOD
+        -- composition/quotient check) are TODO — they need ExtVal/domain math,
+        -- the Keccak challenger, and the per-circuit AIR folder.
+        1,
+    }
+  }
+⟧
+
+end MultiStark
+
+end

From a644b60207c4f39f7e3655bf85ee110d07fefd48 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 25 May 2026 20:55:50 -0300
Subject: [PATCH 09/28] `Ext` redefinition

---
 Ix/MultiStark/Deserialize.lean | 35 ++++++++++++++++++++++++++++++----
 Ix/MultiStark/Verifier.lean    | 20 +++----------------
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/Ix/MultiStark/Deserialize.lean b/Ix/MultiStark/Deserialize.lean
index 2108a5c6..5d1dadc6 100644
--- a/Ix/MultiStark/Deserialize.lean
+++ b/Ix/MultiStark/Deserialize.lean
@@ -36,8 +36,10 @@ def deserialize := ⟦
   -- non-canonical `u64`, kept here as the 8 little-endian bytes (`U64`).
   -- ==========================================================================
 
-  -- `ExtVal = BinomialExtensionField<Goldilocks, 2>` -> `[u64; 2]`.
-  type Ext = [U64; 2]
+  -- `ExtVal = BinomialExtensionField<Goldilocks, 2> = 𝔽_p[X]/(X² - 7)`, stored
+  -- as its two Goldilocks coefficients `[c0, c1]` (= `c0 + c1·X`). On the wire
+  -- each is a raw `u64` limb; `read_ext` reduces it into a field element.
+  type Ext = [G; 2]
 
   -- A Merkle digest: `[u64; DIGEST_ELEMS]` with `DIGEST_ELEMS = 4`.
   type Digest = [U64; 4]
@@ -136,11 +138,36 @@ def deserialize := ⟦
     (flatten_u64(val), s)
   }
 
-  -- `ExtVal` -> `[u64; 2]`, no length prefix.
+  -- Interpret a raw little-endian `u64` limb as a Goldilocks field element. The
+  -- field add/mul reduce mod p, so a non-canonical wire repr (e.g. `0` shipped
+  -- as `p = 0xFFFFFFFF00000001`) maps to the canonical field element.
+  fn limb_to_field(b: U64) -> G {
+    to_field(b[0])
+      + 0x100 * to_field(b[1])
+      + 0x10000 * to_field(b[2])
+      + 0x1000000 * to_field(b[3])
+      + 0x100000000 * to_field(b[4])
+      + 0x10000000000 * to_field(b[5])
+      + 0x1000000000000 * to_field(b[6])
+      + 0x100000000000000 * to_field(b[7])
+  }
+
+  -- `ExtVal` -> two `u64` limbs (no length prefix), reduced to field coefficients.
   fn read_ext(stream: ByteStream) -> (Ext, ByteStream) {
     let (a, s0) = read_u64(stream);
     let (b, s1) = read_u64(s0);
-    ([a, b], s1)
+    ([limb_to_field(a), limb_to_field(b)], s1)
+  }
+
+  -- Extension-field addition: componentwise.
+  fn ext_add(a: Ext, b: Ext) -> Ext {
+    [a[0] + b[0], a[1] + b[1]]
+  }
+
+  -- Extension-field multiplication in 𝔽_p[X]/(X² - 7):
+  --   (a0 + a1·X)(b0 + b1·X) = (a0·b0 + 7·a1·b1) + (a0·b1 + a1·b0)·X.
+  fn ext_mul(a: Ext, b: Ext) -> Ext {
+    [a[0] * b[0] + 7 * (a[1] * b[1]), a[0] * b[1] + a[1] * b[0]]
   }
 
   -- Merkle digest -> `[u64; 4]`, no length prefix.
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index 78bdeb22..cd343f87 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -47,24 +47,10 @@ public section
 namespace MultiStark
 
 def verifier := ⟦
-  -- Interpret a raw little-endian `u64` limb as a Goldilocks field element. The
-  -- field add/mul reduce mod p, so a non-canonical zero (the on-wire repr can be
-  -- `p = 0xFFFFFFFF00000001`, not `0`) maps to the field zero.
-  fn u64_to_field(b: U64) -> G {
-    to_field(b[0])
-      + 0x100 * to_field(b[1])
-      + 0x10000 * to_field(b[2])
-      + 0x1000000 * to_field(b[3])
-      + 0x100000000 * to_field(b[4])
-      + 0x10000000000 * to_field(b[5])
-      + 0x1000000000000 * to_field(b[6])
-      + 0x100000000000000 * to_field(b[7])
-  }
-
-  -- An on-wire extension element (`ExtVal = GoldilocksExt2`) is two raw `u64`
-  -- limbs. It is zero iff both limbs reduce to the field zero (mod p).
+  -- An extension element `[c0, c1]` (`= c0 + c1·X`) is zero iff both Goldilocks
+  -- coefficients are zero. (`read_ext` already reduced the limbs mod p.)
   fn ext_is_zero(e: Ext) -> G {
-    eq_zero(u64_to_field(e[0])) * eq_zero(u64_to_field(e[1]))
+    eq_zero(e[0]) * eq_zero(e[1])
   }
 
   -- 1 iff the LAST element of the accumulator list is the zero extension

From fd05c59af14fe8af817660583c69eae480421437 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Tue, 26 May 2026 10:13:15 -0300
Subject: [PATCH 10/28] Fiat-Shamir

---
 Ix/MultiStark/Verifier.lean | 132 ++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 5 deletions(-)

diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index cd343f87..1505f122 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -67,13 +67,131 @@ def verifier := ⟦
     }
   }
 
-  -- Structural + accumulator verification of a deserialized proof.
+  -- ==========================================================================
+  -- Fiat-Shamir challenger: `SerializingChallenger64<Val, HashChallenger<u8,
+  -- Keccak256Hash, 32>>`. The inner byte challenger keeps an `input` buffer; a
+  -- `sample` with empty `output` flushes (`input := output := keccak256(input)`)
+  -- and pops bytes from the END of the hash output. The outer layer serializes
+  -- field elements as 8 little-endian bytes and samples field elements as
+  -- 8-byte little-endian u64s.
+  --
+  -- The challenger is threaded as a pair `(input, output)` of byte lists, where
+  -- `output` is held in pop order (front = next byte = hash byte 31, 30, …).
+  -- ==========================================================================
+
+  -- Cons 8 bytes (LSB-first) of `b` onto `tail` (one byte list segment).
+  fn b8_onto(b: [U8; 8], tail: ByteStream) -> ByteStream {
+    store(ListNode.Cons(b[0], store(ListNode.Cons(b[1], store(ListNode.Cons(b[2],
+    store(ListNode.Cons(b[3], store(ListNode.Cons(b[4], store(ListNode.Cons(b[5],
+    store(ListNode.Cons(b[6], store(ListNode.Cons(b[7], tail))))))))))))))))
+  }
+
+  -- A digest = `[u64; 4]` = 32 bytes (each limb little-endian) onto `tail`.
+  fn digest_onto(d: Digest, tail: ByteStream) -> ByteStream {
+    b8_onto(d[0], b8_onto(d[1], b8_onto(d[2], b8_onto(d[3], tail))))
+  }
+
+  -- A commitment (`MerkleCap` = `Vec<Digest>`): all digest bytes, onto `tail`.
+  fn cap_onto(cap: MerkleCap, tail: ByteStream) -> ByteStream {
+    match load(cap) {
+      ListNode.Nil => tail,
+      ListNode.Cons(d, rest) => digest_onto(d, cap_onto(rest, tail)),
+    }
+  }
+
+  -- Observe `log_degrees`: each is a `Val::from_u8`, i.e. 8 LE bytes `[ld,0,…]`.
+  fn log_degrees_onto(lds: List‹U8›, tail: ByteStream) -> ByteStream {
+    match load(lds) {
+      ListNode.Nil => tail,
+      ListNode.Cons(ld, rest) =>
+        b8_onto([ld, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], log_degrees_onto(rest, tail)),
+    }
+  }
+
+  -- Reverse `l` onto `acc` (used to put a hash output into pop order).
+  fn rev_onto(l: ByteStream, acc: ByteStream) -> ByteStream {
+    match load(l) {
+      ListNode.Nil => acc,
+      ListNode.Cons(b, rest) => rev_onto(rest, store(ListNode.Cons(b, acc))),
+    }
+  }
+
+  -- Sample one byte. If `output` is empty, flush: hash the `input` buffer, set
+  -- the new input to the 32 hash bytes, and refill `output` (in pop order).
+  fn ch_sample_byte(input: ByteStream, output: ByteStream) -> (U8, ByteStream, ByteStream) {
+    match load(output) {
+      ListNode.Cons(b, rest) => (b, input, rest),
+      ListNode.Nil =>
+        let h = keccak256(input);
+        let fwd = digest_onto(h, store(ListNode.Nil));
+        let rev = rev_onto(fwd, store(ListNode.Nil));
+        let &ListNode.Cons(b, rest) = rev;
+        (b, fwd, rest),
+    }
+  }
+
+  -- Sample 8 bytes = `sample_array()` (for one base-field element, LE).
+  fn ch_sample8(input: ByteStream, output: ByteStream) -> ([U8; 8], ByteStream, ByteStream) {
+    let (b0, i0, o0) = ch_sample_byte(input, output);
+    let (b1, i1, o1) = ch_sample_byte(i0, o0);
+    let (b2, i2, o2) = ch_sample_byte(i1, o1);
+    let (b3, i3, o3) = ch_sample_byte(i2, o2);
+    let (b4, i4, o4) = ch_sample_byte(i3, o3);
+    let (b5, i5, o5) = ch_sample_byte(i4, o4);
+    let (b6, i6, o6) = ch_sample_byte(i5, o5);
+    let (b7, i7, o7) = ch_sample_byte(i6, o6);
+    ([b0, b1, b2, b3, b4, b5, b6, b7], i7, o7)
+  }
+
+  -- Sample a degree-2 extension element: two base samples (`from_basis_*`),
+  -- returning their raw 8-byte LE limbs (so they can be re-observed) and the
+  -- threaded challenger. NOTE: the ~2⁻³² rejection band (limb ≥ p) is not
+  -- handled — `limb_to_field` reduces mod p instead. Honest rejection sampling
+  -- is a TODO for bit-exact agreement with the prover.
+  fn ch_sample_ext(input: ByteStream, output: ByteStream) -> ([U8; 8], [U8; 8], ByteStream, ByteStream) {
+    let (c0, i0, o0) = ch_sample8(input, output);
+    let (c1, i1, o1) = ch_sample8(i0, o0);
+    (c0, c1, i1, o1)
+  }
+
+  -- Replay the verifier transcript and derive the four challenges
+  -- `(lookup, fingerprint, alpha, zeta)`. Mirrors `verify_multiple_claims`'s
+  -- challenger sequence. SIMPLIFICATIONS (TODO): the preprocessed commitment
+  -- (from the verifying key) and the claims are not observed yet, and no
+  -- rejection sampling is done — so the values are not yet prover-faithful when
+  -- the system has preprocessed circuits / claims. The structure is exact.
+  fn fiat_shamir(s1: MerkleCap, s2: MerkleCap, q: MerkleCap, lds: List‹U8›)
+      -> (Ext, Ext, Ext, Ext) {
+    -- observe stage_1 commitment, then the trace heights (log_degrees)
+    let input = cap_onto(s1, store(ListNode.Nil));
+    let input = log_degrees_onto(lds, input);
+    -- sample lookup challenge, then observe it back
+    let (l0, l1, input, _ol) = ch_sample_ext(input, store(ListNode.Nil));
+    let input = b8_onto(l0, b8_onto(l1, input));
+    -- sample fingerprint challenge, then observe it back
+    let (f0, f1, input, _of) = ch_sample_ext(input, store(ListNode.Nil));
+    let input = b8_onto(f0, b8_onto(f1, input));
+    -- observe stage_2 commitment
+    let input = cap_onto(s2, input);
+    -- sample constraint challenge α (not observed)
+    let (a0, a1, input, _oa) = ch_sample_ext(input, store(ListNode.Nil));
+    -- observe quotient commitment
+    let input = cap_onto(q, input);
+    -- sample out-of-domain point ζ
+    let (z0, z1, _input, _oz) = ch_sample_ext(input, store(ListNode.Nil));
+    ([limb_to_field(l0), limb_to_field(l1)],
+     [limb_to_field(f0), limb_to_field(f1)],
+     [limb_to_field(a0), limb_to_field(a1)],
+     [limb_to_field(z0), limb_to_field(z1)])
+  }
+
+  -- Structural + accumulator + Fiat-Shamir verification of a deserialized proof.
   --
   -- Returns 1 on success; `assert_eq!` aborts the (proof) execution on any
   -- failed check, exactly as the Rust verifier returns `Err`.
   fn verify(proof: Proof) -> G {
     match proof {
-      Proof.Mk(_commitments, accs, _log_degrees, opening,
+      Proof.Mk(commitments, accs, log_degrees, opening,
                _quotient, _preprocessed, stage_1, stage_2) =>
         -- Step 1 (shape, system-independent): the per-round opened-value lists
         -- and the accumulator list all have the same length = the circuit count.
@@ -86,12 +204,16 @@ def verifier := ⟦
         -- Step 2: accumulator balance — the last accumulator must be zero.
         assert_eq!(last_acc_is_zero(accs), 1);
 
+        -- Step 3: Fiat-Shamir challenger replay → (lookup, fingerprint, α, ζ).
+        -- Derived here; consumed by the OOD check (step 5, still TODO).
+        let Commitments.Mk(s1, s2, q) = commitments;
+        let _challenges = fiat_shamir(s1, s2, q, log_degrees);
+
         -- Step 4: PCS opening proof (stubbed — accepts; see Pcs.lean).
         let _pcs = pcs_verify(opening);
 
-        -- Steps 3 (Fiat-Shamir challenger replay over keccak) and 5 (OOD
-        -- composition/quotient check) are TODO — they need ExtVal/domain math,
-        -- the Keccak challenger, and the per-circuit AIR folder.
+        -- Step 5 (OOD composition/quotient check) is TODO — it needs the
+        -- per-circuit AIR constraint folder and evaluation-domain arithmetic.
         1,
     }
   }

From 25f5bcde56fea6201be0cbabc71baaf83a892e26 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Tue, 26 May 2026 12:08:27 -0300
Subject: [PATCH 11/28] OOD check scaffold

---
 Ix/MultiStark/Deserialize.lean |  37 +++++++++++
 Ix/MultiStark/Verifier.lean    | 109 +++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+)

diff --git a/Ix/MultiStark/Deserialize.lean b/Ix/MultiStark/Deserialize.lean
index 5d1dadc6..a4f69eeb 100644
--- a/Ix/MultiStark/Deserialize.lean
+++ b/Ix/MultiStark/Deserialize.lean
@@ -170,6 +170,43 @@ def deserialize := ⟦
     [a[0] * b[0] + 7 * (a[1] * b[1]), a[0] * b[1] + a[1] * b[0]]
   }
 
+  fn ext_sub(a: Ext, b: Ext) -> Ext {
+    [a[0] - b[0], a[1] - b[1]]
+  }
+
+  fn ext_neg(a: Ext) -> Ext {
+    [0 - a[0], 0 - a[1]]
+  }
+
+  -- Goldilocks (base-field) inverse via Fermat: x^(p-2). Aiur has no field
+  -- division, so we exponentiate. p = 2^64 - 2^32 + 1, so
+  -- p - 2 = 2^64 - 2^32 - 1 — in binary: bit 32 is 0, every other bit is 1.
+  -- `g_run(acc, base, n)` applies n steps of `acc ← acc²·base` (a run of
+  -- set exponent bits). 31 ones, one zero, 32 ones reconstructs p-2.
+  fn g_run(acc: G, base: G, n: G) -> G {
+    match n {
+      0 => acc,
+      _ => g_run(acc * acc * base, base, n - 1),
+    }
+  }
+  fn g_inverse(x: G) -> G {
+    let acc = g_run(x, x, 30);   -- bits 63..33: 31 ones (initial acc=x is bit 63)
+    let acc = acc * acc;          -- bit 32: a single 0
+    g_run(acc, x, 32)             -- bits 31..0: 32 ones
+  }
+
+  -- Extension inverse in 𝔽_p[X]/(X² - 7). For a = a0 + a1·X the conjugate is
+  -- a0 - a1·X and the norm a·ā = a0² - 7·a1² ∈ 𝔽_p, so a⁻¹ = ā / norm.
+  fn ext_inverse(a: Ext) -> Ext {
+    let norm = a[0] * a[0] - 7 * (a[1] * a[1]);
+    let ninv = g_inverse(norm);
+    [a[0] * ninv, (0 - a[1]) * ninv]
+  }
+
+  fn ext_div(a: Ext, b: Ext) -> Ext {
+    ext_mul(a, ext_inverse(b))
+  }
+
   -- Merkle digest -> `[u64; 4]`, no length prefix.
   fn read_digest(stream: ByteStream) -> (Digest, ByteStream) {
     let (a, s0) = read_u64(stream);
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index 1505f122..fe29274f 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -185,6 +185,104 @@ def verifier := ⟦
      [limb_to_field(z0), limb_to_field(z1)])
   }
 
+  -- ==========================================================================
+  -- OOD evaluation domain math (`TwoAdicMultiplicativeCoset`, Goldilocks).
+  -- The trace domain for a circuit of size 2^L is the order-2^L subgroup H
+  -- (shift = 1) generated by `two_adic_gen(L)`.
+  -- ==========================================================================
+
+  -- x^(2^k): k repeated squarings (base field).
+  fn exp_pow2(x: G, k: G) -> G {
+    match k {
+      0 => x,
+      _ => exp_pow2(x * x, k - 1),
+    }
+  }
+
+  -- e^(2^k) in the extension field.
+  fn ext_exp_pow2(e: Ext, k: G) -> Ext {
+    match k {
+      0 => e,
+      _ => ext_exp_pow2(ext_mul(e, e), k - 1),
+    }
+  }
+
+  -- `two_adic_generator(bits)` — a primitive 2^bits root of unity in Goldilocks
+  -- (`Plonky3/goldilocks/src/goldilocks.rs::TWO_ADIC_GENERATORS`).
+  fn two_adic_gen(bits: G) -> G {
+    match bits {
+      0  => 0x0000000000000001,
+      1  => 0xffffffff00000000,
+      2  => 0x0001000000000000,
+      3  => 0xfffffffeff000001,
+      4  => 0xefffffff00000001,
+      5  => 0x00003fffffffc000,
+      6  => 0x0000008000000000,
+      7  => 0xf80007ff08000001,
+      8  => 0xbf79143ce60ca966,
+      9  => 0x1905d02a5c411f4e,
+      10 => 0x9d8f2ad78bfed972,
+      11 => 0x0653b4801da1c8cf,
+      12 => 0xf2c35199959dfcb6,
+      13 => 0x1544ef2335d17997,
+      14 => 0xe0ee099310bba1e2,
+      15 => 0xf6b2cffe2306baac,
+      16 => 0x54df9630bf79450e,
+      17 => 0xabd0a6e8aa3d8a0e,
+      18 => 0x81281a7b05f9beac,
+      19 => 0xfbd41c6b8caa3302,
+      20 => 0x30ba2ecd5e93e76d,
+      21 => 0xf502aef532322654,
+      22 => 0x4b2a18ade67246b5,
+      23 => 0xea9d5a1336fbc98b,
+      24 => 0x86cdcc31c307e171,
+      25 => 0x4bbaf5976ecfefd8,
+      26 => 0xed41d05b78d6e286,
+      27 => 0x10d78dd8915a171d,
+      28 => 0x59049500004a4485,
+      29 => 0xdfa8c93ba46d2666,
+      30 => 0x7e9bd009b86a0845,
+      31 => 0x400a7f755588e659,
+      _  => 0x185629dcda58878c,
+    }
+  }
+
+  -- Vanishing polynomial of the trace domain (shift = 1, size 2^L) at point ζ:
+  -- `Z_H(ζ) = ζ^(2^L) - 1`.
+  fn trace_vanishing(zeta: Ext, l: G) -> Ext {
+    ext_sub(ext_exp_pow2(zeta, l), [1, 0])
+  }
+
+  -- Lagrange selectors at ζ for the trace domain (shift = 1), mirroring
+  -- `domain.rs::selectors_at_point`:
+  --   is_first_row   = Z_H(ζ) / (ζ - 1)
+  --   is_last_row    = Z_H(ζ) / (ζ - g⁻¹)
+  --   is_transition  = ζ - g⁻¹
+  --   inv_vanishing  = 1 / Z_H(ζ)
+  -- where g = two_adic_gen(L) is the subgroup generator.
+  fn trace_selectors(zeta: Ext, l: G) -> (Ext, Ext, Ext, Ext) {
+    let zh = trace_vanishing(zeta, l);
+    let ginv = g_inverse(two_adic_gen(l));
+    let is_first = ext_div(zh, ext_sub(zeta, [1, 0]));
+    let is_last = ext_div(zh, ext_sub(zeta, [ginv, 0]));
+    let is_trans = ext_sub(zeta, [ginv, 0]);
+    let inv_van = ext_inverse(zh);
+    (is_first, is_last, is_trans, inv_van)
+  }
+
+  -- Self-test for the OOD domain math: the subgroup generator is a primitive
+  -- 2^L-th root of unity (g^(2^L) = 1, g^(2^(L-1)) = -1), and the vanishing
+  -- polynomial is zero at a domain point.
+  pub fn ood_domain_test() -> G {
+    let g = two_adic_gen(3);
+    assert_eq!(exp_pow2(g, 3), 1);
+    assert_eq!(exp_pow2(g, 2), 0 - 1);
+    let v = trace_vanishing([g, 0], 3);
+    assert_eq!(v[0], 0);
+    assert_eq!(v[1], 0);
+    1
+  }
+
   -- Structural + accumulator + Fiat-Shamir verification of a deserialized proof.
   --
   -- Returns 1 on success; `assert_eq!` aborts the (proof) execution on any
@@ -217,6 +315,17 @@ def verifier := ⟦
         1,
     }
   }
+
+  -- Self-test for the extension-field arithmetic the OOD check is built on:
+  -- base inverse (7·7⁻¹ = 1) and extension inverse ((3+5X)·(3+5X)⁻¹ = 1).
+  pub fn ext_arith_test() -> G {
+    assert_eq!(7 * g_inverse(7), 1);
+    let a = [3, 5];
+    let prod = ext_mul(a, ext_inverse(a));
+    assert_eq!(prod[0], 1);
+    assert_eq!(prod[1], 0);
+    1
+  }
 ⟧
 
 end MultiStark

From 354bdbd0b334a7a78501dc7d9531e1846505d022 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Tue, 26 May 2026 12:45:33 -0300
Subject: [PATCH 12/28] Verifying key manual codec

---
 Cargo.lock              |   1 -
 Cargo.toml              |   8 +-
 src/aiur.rs             |   1 +
 src/aiur/constraints.rs |   1 +
 src/aiur/memory.rs      |   3 +-
 src/aiur/synthesis.rs   |   5 +-
 src/aiur/vk_codec.rs    | 306 ++++++++++++++++++++++++++++++++++++++++
 7 files changed, 317 insertions(+), 8 deletions(-)
 create mode 100644 src/aiur/vk_codec.rs

diff --git a/Cargo.lock b/Cargo.lock
index 7de24578..b1df7dd0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1954,7 +1954,6 @@ dependencies = [
 [[package]]
 name = "multi-stark"
 version = "0.1.0"
-source = "git+https://github.com/argumentcomputer/multi-stark.git?rev=9ecab51d553445c0cc7b571af00a76b8a83a6f8c#9ecab51d553445c0cc7b571af00a76b8a83a6f8c"
 dependencies = [
  "bincode",
  "p3-air",
diff --git a/Cargo.toml b/Cargo.toml
index 58b90be3..8339ae85 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@ itertools = "0.14.0"
 indexmap = { version = "2", features = ["rayon"] }
 lean-ffi = { git = "https://github.com/argumentcomputer/lean-ffi", rev = "cc98ebf67bf453ac3827cb767f78b13ea674dd6a" }
 mimalloc = { version = "0.1", default-features = false }
-multi-stark = { git = "https://github.com/argumentcomputer/multi-stark.git", rev = "9ecab51d553445c0cc7b571af00a76b8a83a6f8c" }
+multi-stark = { path = "multi-stark" }
 num-bigint = "0.4.6"
 rayon = "1"
 rustc-hash = "2"
@@ -31,8 +31,8 @@ getrandom = { version = "0.3", optional = true }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 tracing-texray = { git = "https://github.com/argumentcomputer/tracing-texray", rev = "8ce04e3422cd48e68ef47fab95dba7d06b8c368c" }
-bincode = { version = "2.0.1", optional = true }
-serde = { version = "1.0.219", features = ["derive"], optional = true }
+bincode = { version = "2.0.1" }
+serde = { version = "1.0.219", features = ["derive"] }
 
 [dev-dependencies]
 quickcheck = "1.0.3"
@@ -43,7 +43,7 @@ quickcheck_macros = "1.0.0"
 default = []
 parallel = ["multi-stark/parallel"]
 test-ffi = []
-net = ["bytes", "tokio", "iroh", "iroh-base", "n0-error", "getrandom", "bincode", "serde" ]
+net = ["bytes", "tokio", "iroh", "iroh-base", "n0-error", "getrandom" ]
 
 [profile.dev]
 panic = "abort"
diff --git a/src/aiur.rs b/src/aiur.rs
index d54325cc..a218d9b8 100644
--- a/src/aiur.rs
+++ b/src/aiur.rs
@@ -5,6 +5,7 @@ pub mod gadgets;
 pub mod memory;
 pub mod synthesis;
 pub mod trace;
+pub mod vk_codec;
 
 use multi_stark::p3_field::PrimeCharacteristicRing;
 
diff --git a/src/aiur/constraints.rs b/src/aiur/constraints.rs
index abdbef2b..4ad382b0 100644
--- a/src/aiur/constraints.rs
+++ b/src/aiur/constraints.rs
@@ -33,6 +33,7 @@ type Degree = u8;
 static INV_256: LazyLock<G> = LazyLock::new(|| G::from_u64(256).inverse());
 
 /// Holds data for a function circuit.
+#[derive(serde::Serialize, serde::Deserialize)]
 pub struct Constraints {
   pub zeros: Vec<Expr>,
   pub selectors: Range<usize>,
diff --git a/src/aiur/memory.rs b/src/aiur/memory.rs
index fa676394..d6069942 100644
--- a/src/aiur/memory.rs
+++ b/src/aiur/memory.rs
@@ -15,8 +15,9 @@ use rayon::{
 
 use crate::aiur::{G, execute::QueryRecord, memory_channel};
 
+#[derive(serde::Serialize, serde::Deserialize)]
 pub struct Memory {
-  width: usize,
+  pub(crate) width: usize,
 }
 
 impl Memory {
diff --git a/src/aiur/synthesis.rs b/src/aiur/synthesis.rs
index 1dd7f971..bc861635 100644
--- a/src/aiur/synthesis.rs
+++ b/src/aiur/synthesis.rs
@@ -26,10 +26,11 @@ pub struct AiurSystem {
   toplevel: Toplevel,
   // perhaps remove the key from the system in verifier only mode?
   key: ProverKey,
-  system: System<AiurCircuit>,
+  pub(crate) system: System<AiurCircuit>,
 }
 
-enum AiurCircuit {
+#[derive(serde::Serialize, serde::Deserialize)]
+pub(crate) enum AiurCircuit {
   Function(Constraints),
   Memory(Memory),
   Bytes1,
diff --git a/src/aiur/vk_codec.rs b/src/aiur/vk_codec.rs
new file mode 100644
index 00000000..e719c47a
--- /dev/null
+++ b/src/aiur/vk_codec.rs
@@ -0,0 +1,306 @@
+//! (De)serialization of the verifier's key: `System<AiurCircuit>`.
+//!
+//! The verifier needs the system's per-circuit AIR (symbolic constraints +
+//! lookups + widths), the shared preprocessed commitment, the preprocessed
+//! index map, and the commitment parameters — i.e. everything in
+//! [`System<AiurCircuit>`] except the prover-only preprocessed traces (the
+//! large gadget tables in each `LookupAir.preprocessed`, which are
+//! `#[serde(skip)]`-ed and reconstructed/committed separately).
+//!
+//! This uses serde (`#[derive(Serialize, Deserialize)]` on `System`, `Circuit`,
+//! `LookupAir`, `Lookup`, `SymbolicExpression`, `SymbolicVariable`, `Entry`,
+//! `CommitmentParameters`, and the Ix `AiurCircuit`/`Constraints`/`Memory`)
+//! with the **same bincode config the proof uses**
+//! (`standard().with_little_endian().with_fixed_int_encoding()`), so the wire
+//! format matches `Proof`'s and can be re-implemented in Aiur the same way.
+
+// The codec is exercised by tests today and wired to the FFI / Aiur port next.
+#![allow(dead_code)]
+
+use bincode::{
+  config::{Configuration, Fixint, LittleEndian, standard},
+  serde::{decode_from_slice, encode_to_vec},
+};
+use multi_stark::system::System;
+
+use crate::aiur::synthesis::{AiurCircuit, AiurSystem};
+
+/// The bincode configuration shared with `multi_stark::prover::Proof`.
+fn serde_config() -> Configuration<LittleEndian, Fixint> {
+  standard().with_little_endian().with_fixed_int_encoding()
+}
+
+/// Serialize the verifying key `System<AiurCircuit>` (preprocessed traces are
+/// skipped — see the module docs).
+pub(crate) fn to_bytes(system: &System<AiurCircuit>) -> Result<Vec<u8>, String> {
+  encode_to_vec(system, serde_config()).map_err(|e| e.to_string())
+}
+
+/// Deserialize a `System<AiurCircuit>` from [`to_bytes`] output, requiring that
+/// every byte is consumed.
+pub(crate) fn from_bytes(bytes: &[u8]) -> Result<System<AiurCircuit>, String> {
+  let (system, consumed) =
+    decode_from_slice(bytes, serde_config()).map_err(|e| e.to_string())?;
+  if consumed != bytes.len() {
+    return Err(format!("trailing data: consumed {consumed} of {}", bytes.len()));
+  }
+  Ok(system)
+}
+
+/// Convenience: serialize the verifying key of a built [`AiurSystem`].
+pub(crate) fn aiur_system_to_bytes(sys: &AiurSystem) -> Result<Vec<u8>, String> {
+  to_bytes(&sys.system)
+}
+
+#[cfg(test)]
+mod tests {
+  use super::*;
+  use crate::aiur::gadgets::{AiurGadget, bytes1::Bytes1, bytes2::Bytes2};
+  use multi_stark::{lookup::LookupAir, types::CommitmentParameters};
+
+  /// Build a small real `System<AiurCircuit>` from the two byte gadgets and
+  /// check the verifying-key codec round-trips (re-encoding is byte-identical).
+  #[test]
+  fn system_vk_round_trips() {
+    let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 };
+    let (system, _key) = System::new(
+      cp,
+      [
+        LookupAir::new(AiurCircuit::Bytes1, Bytes1.lookups()),
+        LookupAir::new(AiurCircuit::Bytes2, Bytes2.lookups()),
+      ],
+    );
+    let bytes = to_bytes(&system).expect("encode");
+    let back = from_bytes(&bytes).expect("decode");
+    let reencoded = to_bytes(&back).expect("re-encode");
+    assert_eq!(bytes, reencoded, "verifying-key codec round-trip mismatch");
+  }
+
+  #[test]
+  fn rejects_trailing_bytes() {
+    let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 };
+    let (system, _key) =
+      System::new(cp, [LookupAir::new(AiurCircuit::Bytes1, Bytes1.lookups())]);
+    let mut bytes = to_bytes(&system).expect("encode");
+    bytes.push(0);
+    assert!(from_bytes(&bytes).is_err(), "should reject trailing data");
+  }
+}
+
+// ════════════════════════════════════════════════════════════════════════════
+// Manual deserializer — a hand-written reader matching the exact bincode bytes
+// `to_bytes` produces, decoding straight into the real `System<AiurCircuit>`.
+// This is the reference re-implemented in Aiur (the verifier reads its key from
+// this stream); `manual_matches_serde` proves it agrees with serde.
+//
+// bincode `standard().little_endian().fixed_int` layout:
+//   enum tag : u32 (4 bytes LE)       Option : 1 byte (0 = None / 1 = Some)
+//   usize / u64 / G : 8 bytes LE      Vec : u64 length, then elements
+//   struct : fields in declaration order      Range<usize> : start, end (u64)
+//   MerkleCap : Vec<[u64; 4]>         PhantomData : 0 bytes
+// ════════════════════════════════════════════════════════════════════════════
+
+use multi_stark::{
+  builder::symbolic::{Entry, SymbolicExpression, SymbolicVariable},
+  lookup::{Lookup, LookupAir},
+  p3_field::PrimeCharacteristicRing,
+  system::Circuit,
+  types::{Commitment, CommitmentParameters, Val},
+};
+
+use crate::aiur::{constraints::Constraints, memory::Memory};
+
+type Expr = SymbolicExpression<Val>;
+
+struct R<'a> {
+  buf: &'a [u8],
+  pos: usize,
+}
+
+impl<'a> R<'a> {
+  fn take(&mut self, n: usize) -> Result<&'a [u8], String> {
+    let end = self.pos.checked_add(n).ok_or("length overflow")?;
+    if end > self.buf.len() {
+      return Err(format!("eof: need {n} at offset {}", self.pos));
+    }
+    let s = &self.buf[self.pos..end];
+    self.pos = end;
+    Ok(s)
+  }
+  fn u8(&mut self) -> Result<u8, String> {
+    Ok(self.take(1)?[0])
+  }
+  fn u32(&mut self) -> Result<u32, String> {
+    Ok(u32::from_le_bytes(self.take(4)?.try_into().unwrap()))
+  }
+  fn u64(&mut self) -> Result<u64, String> {
+    Ok(u64::from_le_bytes(self.take(8)?.try_into().unwrap()))
+  }
+  fn usize(&mut self) -> Result<usize, String> {
+    Ok(self.u64()? as usize)
+  }
+  fn g(&mut self) -> Result<Val, String> {
+    Ok(Val::from_u64(self.u64()?))
+  }
+  fn entry(&mut self) -> Result<Entry, String> {
+    Ok(match self.u32()? {
+      0 => Entry::Preprocessed { offset: self.usize()? },
+      1 => Entry::Main { offset: self.usize()? },
+      2 => Entry::Stage2 { offset: self.usize()? },
+      3 => Entry::Public,
+      4 => Entry::Stage2Public,
+      5 => Entry::Challenge,
+      t => return Err(format!("bad Entry tag {t}")),
+    })
+  }
+  fn expr(&mut self) -> Result<Expr, String> {
+    Ok(match self.u32()? {
+      0 => SymbolicExpression::Variable(SymbolicVariable::new(
+        self.entry()?,
+        self.usize()?,
+      )),
+      1 => SymbolicExpression::IsFirstRow,
+      2 => SymbolicExpression::IsLastRow,
+      3 => SymbolicExpression::IsTransition,
+      4 => SymbolicExpression::Constant(self.g()?),
+      5 => {
+        let x = Box::new(self.expr()?);
+        let y = Box::new(self.expr()?);
+        SymbolicExpression::Add { x, y, degree_multiple: self.usize()? }
+      },
+      6 => {
+        let x = Box::new(self.expr()?);
+        let y = Box::new(self.expr()?);
+        SymbolicExpression::Sub { x, y, degree_multiple: self.usize()? }
+      },
+      7 => {
+        let x = Box::new(self.expr()?);
+        SymbolicExpression::Neg { x, degree_multiple: self.usize()? }
+      },
+      8 => {
+        let x = Box::new(self.expr()?);
+        let y = Box::new(self.expr()?);
+        SymbolicExpression::Mul { x, y, degree_multiple: self.usize()? }
+      },
+      t => return Err(format!("bad SymbolicExpression tag {t}")),
+    })
+  }
+  fn vec<T>(
+    &mut self,
+    mut f: impl FnMut(&mut Self) -> Result<T, String>,
+  ) -> Result<Vec<T>, String> {
+    let n = self.usize()?;
+    let mut v = Vec::with_capacity(n.min(1 << 16));
+    for _ in 0..n {
+      v.push(f(self)?);
+    }
+    Ok(v)
+  }
+  fn lookup(&mut self) -> Result<Lookup<Expr>, String> {
+    Ok(Lookup { multiplicity: self.expr()?, args: self.vec(Self::expr)? })
+  }
+  fn aircircuit(&mut self) -> Result<AiurCircuit, String> {
+    Ok(match self.u32()? {
+      0 => AiurCircuit::Function(Constraints {
+        zeros: self.vec(Self::expr)?,
+        selectors: self.usize()?..self.usize()?,
+        width: self.usize()?,
+      }),
+      1 => AiurCircuit::Memory(Memory { width: self.usize()? }),
+      2 => AiurCircuit::Bytes1,
+      3 => AiurCircuit::Bytes2,
+      t => return Err(format!("bad AiurCircuit tag {t}")),
+    })
+  }
+  fn circuit(&mut self) -> Result<Circuit<AiurCircuit>, String> {
+    // LookupAir: inner_air, lookups (preprocessed is `#[serde(skip)]` -> None).
+    let air = LookupAir {
+      inner_air: self.aircircuit()?,
+      lookups: self.vec(Self::lookup)?,
+      preprocessed: None,
+    };
+    Ok(Circuit {
+      air,
+      constraint_count: self.usize()?,
+      max_constraint_degree: self.usize()?,
+      preprocessed_height: self.usize()?,
+      preprocessed_width: self.usize()?,
+      stage_1_width: self.usize()?,
+      stage_2_width: self.usize()?,
+    })
+  }
+  fn commitment(&mut self) -> Result<Commitment, String> {
+    let caps =
+      self.vec(|r| Ok([r.u64()?, r.u64()?, r.u64()?, r.u64()?]))?;
+    Ok(Commitment::from(caps))
+  }
+  fn option<T>(
+    &mut self,
+    f: impl FnOnce(&mut Self) -> Result<T, String>,
+  ) -> Result<Option<T>, String> {
+    match self.u8()? {
+      0 => Ok(None),
+      1 => Ok(Some(f(self)?)),
+      t => Err(format!("bad Option tag {t}")),
+    }
+  }
+}
+
+/// Hand-written deserializer of the verifying-key `System<AiurCircuit>`,
+/// matching serde/bincode byte-for-byte (the Aiur port mirrors this).
+pub(crate) fn manual_deserialize(
+  bytes: &[u8],
+) -> Result<System<AiurCircuit>, String> {
+  let mut r = R { buf: bytes, pos: 0 };
+  let commitment_parameters = CommitmentParameters {
+    log_blowup: r.usize()?,
+    cap_height: r.usize()?,
+  };
+  let circuits = r.vec(R::circuit)?;
+  let preprocessed_commit = r.option(R::commitment)?;
+  let preprocessed_indices = r.vec(|r| r.option(R::usize))?;
+  if r.pos != bytes.len() {
+    return Err(format!("trailing data: consumed {} of {}", r.pos, bytes.len()));
+  }
+  Ok(System {
+    commitment_parameters,
+    circuits,
+    preprocessed_commit,
+    preprocessed_indices,
+  })
+}
+
+#[cfg(test)]
+mod manual_tests {
+  use super::*;
+  use crate::aiur::gadgets::{AiurGadget, bytes1::Bytes1, bytes2::Bytes2};
+  use multi_stark::{lookup::LookupAir, types::CommitmentParameters};
+
+  /// The manual deserializer agrees with serde: decoding serde's bytes and
+  /// re-encoding with serde reproduces them byte-for-byte.
+  #[test]
+  fn manual_matches_serde() {
+    let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 };
+    let (system, _key) = System::new(
+      cp,
+      [
+        LookupAir::new(AiurCircuit::Bytes1, Bytes1.lookups()),
+        LookupAir::new(AiurCircuit::Bytes2, Bytes2.lookups()),
+      ],
+    );
+    let bytes = to_bytes(&system).expect("serde encode");
+    let manual = manual_deserialize(&bytes).expect("manual decode");
+    let reencoded = to_bytes(&manual).expect("serde re-encode");
+    assert_eq!(bytes, reencoded, "manual deserializer disagrees with serde");
+  }
+
+  #[test]
+  fn manual_rejects_trailing() {
+    let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 };
+    let (system, _key) =
+      System::new(cp, [LookupAir::new(AiurCircuit::Bytes2, Bytes2.lookups())]);
+    let mut bytes = to_bytes(&system).expect("encode");
+    bytes.push(0);
+    assert!(manual_deserialize(&bytes).is_err());
+  }
+}

From c7409a6e0b4b574b093921ae510b5618e7049a6d Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Thu, 28 May 2026 18:54:33 -0300
Subject: [PATCH 13/28] System deserialize in Aiur

---
 Ix/Aiur/Protocol.lean                |   4 +
 Ix/MultiStark.lean                   |  14 +-
 Ix/MultiStark/SystemDeserialize.lean | 285 +++++++++++++++++++++++++++
 RecursiveVerifier.lean               |  33 ++--
 src/ffi/aiur/protocol.rs             |  13 ++
 5 files changed, 337 insertions(+), 12 deletions(-)
 create mode 100644 Ix/MultiStark/SystemDeserialize.lean

diff --git a/Ix/Aiur/Protocol.lean b/Ix/Aiur/Protocol.lean
index df320811..7bfdd5df 100644
--- a/Ix/Aiur/Protocol.lean
+++ b/Ix/Aiur/Protocol.lean
@@ -47,6 +47,10 @@ namespace AiurSystem
 @[extern "rs_aiur_system_build"]
 opaque build : @&Bytecode.Toplevel → @&CommitmentParameters → AiurSystem
 
+/-- Serialize the verifying key (`System<AiurCircuit>`) to bytes. -/
+@[extern "rs_aiur_system_vk_bytes"]
+opaque vkBytes : @& AiurSystem → ByteArray
+
 @[extern "rs_aiur_system_prove"]
 private opaque prove' : @& AiurSystem → @& FriParameters →
   @& Bytecode.FunIdx → @& Array G →
diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index 3c45891b..a37c19a5 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -5,6 +5,7 @@ public import Ix.IxVM.ByteStream
 public import Ix.MultiStark.Deserialize
 public import Ix.MultiStark.Keccak
 public import Ix.MultiStark.Pcs
+public import Ix.MultiStark.SystemDeserialize
 public import Ix.MultiStark.Verifier
 
 /-!
@@ -30,12 +31,22 @@ def entrypoints := ⟦
   -- deserialize into a `Proof` object (asserting the whole stream was consumed),
   -- then recompute keccak-256 over the same bytes and assert it equals `digest`
   -- — binding the IO-fed bytes to the public commitment.
-  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4]) {
+  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4], system_digest: [[U8; 8]; 4]) {
+    -- Proof from IO key [0]: deserialize, assert fully consumed, and bind the
+    -- bytes to the public keccak-256 `digest`.
     let (idx, len) = io_get_info([0]);
     let bytes = #read_byte_stream(idx, len);
     let (proof, rest) = read_proof(bytes);
     assert_eq!(load(rest), ListNode.Nil);
     assert_eq!(keccak256(bytes), digest);
+    -- Verifying key (`System<AiurCircuit>`) from IO key [1]: bind the bytes to
+    -- the public keccak-256 `system_digest`, then reconstruct the system.
+    let (sidx, slen) = io_get_info([1]);
+    let sbytes = #read_byte_stream(sidx, slen);
+    assert_eq!(keccak256(sbytes), system_digest);
+    let (_sys, srest) = read_system(sbytes);
+    assert_eq!(load(srest), ListNode.Nil);
+    -- Structural + accumulator + Fiat-Shamir checks (OOD will use `_sys`).
     assert_eq!(verify(proof), 1);
     ()
   }
@@ -48,6 +59,7 @@ def multiStark : Except Aiur.Global Aiur.Source.Toplevel := do
   let t ← IxVM.core.merge IxVM.byteStream
   let t ← t.merge deserialize
   let t ← t.merge keccak
+  let t ← t.merge systemDeserialize
   let t ← t.merge pcs
   let t ← t.merge verifier
   t.merge entrypoints
diff --git a/Ix/MultiStark/SystemDeserialize.lean b/Ix/MultiStark/SystemDeserialize.lean
new file mode 100644
index 00000000..6c37ce04
--- /dev/null
+++ b/Ix/MultiStark/SystemDeserialize.lean
@@ -0,0 +1,285 @@
+module
+public import Ix.Aiur.Meta
+public import Ix.IxVM.Core
+public import Ix.IxVM.ByteStream
+public import Ix.MultiStark.Deserialize
+
+/-!
+# Verifying-key deserializer (Aiur)
+
+Aiur port of `src/aiur/vk_codec.rs::manual_deserialize` — reconstructs the
+verifier's `System<AiurCircuit>` from the bytes the prover places on the IO
+channel. Same bincode wire format the Rust side validated against serde:
+
+* enum tag       : `u32`, 4 bytes LE
+* `Option`       : 1 tag byte (`0` = None, `1` = Some)
+* `usize`/`u64`  : 8 bytes LE
+* `Vec<T>`       : `u64` length, then elements
+* struct         : fields in declaration order
+* `Range<usize>` : `start`, `end` (two `u64`)
+* `MerkleCap`    : `Vec<[u64; 4]>`
+* Goldilocks `G` : raw `u64`, 8 bytes LE (reduced mod p on read)
+
+Reuses the proof deserializer's byte primitives (`read_u8`, `read_u64`,
+`read_count`, `read_merkle_cap`, `limb_to_field`, `Digest`, `MerkleCap`).
+-/
+
+public section
+
+namespace MultiStark
+
+def systemDeserialize := ⟦
+  -- ==========================================================================
+  -- Reconstructed `System<AiurCircuit>` as Aiur data.
+  -- ==========================================================================
+
+  -- `SymbolicVariable.entry` (a column reference within the window).
+  enum SysEntry {
+    Preprocessed(G),   -- offset
+    Main(G),
+    Stage2(G),
+    Public,
+    Stage2Public,
+    Challenge
+  }
+
+  -- `SymbolicExpression<G>` — the AIR constraint tree. Children are pointers;
+  -- the trailing `G` on Add/Sub/Neg/Mul is the cached `degree_multiple`.
+  enum SymExpr {
+    Var(SysEntry, G),            -- entry, index
+    IsFirstRow,
+    IsLastRow,
+    IsTransition,
+    Const(G),
+    Add(&SymExpr, &SymExpr, G),
+    Sub(&SymExpr, &SymExpr, G),
+    Neg(&SymExpr, G),
+    Mul(&SymExpr, &SymExpr, G)
+  }
+
+  enum SysLookup { Mk(SymExpr, List‹SymExpr›) }          -- multiplicity, args
+
+  enum SysConstraints { Mk(List‹SymExpr›, G, G, G) }     -- zeros, sel_start, sel_end, width
+
+  enum SysMemory { Mk(G) }                               -- width
+
+  enum SysAir {
+    Function(SysConstraints),
+    Memory(SysMemory),
+    Bytes1,
+    Bytes2
+  }
+
+  enum SysLookupAir { Mk(SysAir, List‹SysLookup›) }      -- inner_air, lookups
+
+  -- air, constraint_count, max_constraint_degree, preprocessed_height,
+  -- preprocessed_width, stage_1_width, stage_2_width.
+  enum SysCircuit { Mk(SysLookupAir, G, G, G, G, G, G) }
+
+  enum SysParams { Mk(G, G) }                            -- log_blowup, cap_height
+
+  -- `Option`s as dedicated non-generic enums (unambiguous constructors).
+  enum OptCommit { NoCommit, SomeCommit(MerkleCap) }
+  enum OptIdx { NoIdx, SomeIdx(G) }
+
+  -- commitment_parameters, circuits, preprocessed_commit, preprocessed_indices.
+  enum Sys { Mk(SysParams, List‹SysCircuit›, OptCommit, List‹OptIdx›) }
+
+  -- ==========================================================================
+  -- Byte primitives specific to the VK format.
+  -- ==========================================================================
+
+  -- A `u32` enum tag: 4 little-endian bytes folded into a field element.
+  fn read_u32(stream: ByteStream) -> (G, ByteStream) {
+    let (b0, s0) = read_u8(stream);
+    let (b1, s1) = read_u8(s0);
+    let (b2, s2) = read_u8(s1);
+    let (b3, s3) = read_u8(s2);
+    (to_field(b0) + 0x100 * to_field(b1) + 0x10000 * to_field(b2)
+       + 0x1000000 * to_field(b3), s3)
+  }
+
+  -- A raw `u64` Goldilocks value, reduced mod p (for `Constant`).
+  fn read_field(stream: ByteStream) -> (G, ByteStream) {
+    let (u, s) = read_u64(stream);
+    (limb_to_field(u), s)
+  }
+
+  -- ==========================================================================
+  -- Recursive readers (mirror `vk_codec` reader-by-reader).
+  -- ==========================================================================
+
+  fn read_sys_entry(stream: ByteStream) -> (SysEntry, ByteStream) {
+    let (tag, s) = read_u32(stream);
+    match tag {
+      0 => let (o, s1) = read_count(s); (SysEntry.Preprocessed(o), s1),
+      1 => let (o, s1) = read_count(s); (SysEntry.Main(o), s1),
+      2 => let (o, s1) = read_count(s); (SysEntry.Stage2(o), s1),
+      3 => (SysEntry.Public, s),
+      4 => (SysEntry.Stage2Public, s),
+      _ => (SysEntry.Challenge, s),
+    }
+  }
+
+  fn read_sym_expr(stream: ByteStream) -> (SymExpr, ByteStream) {
+    let (tag, s) = read_u32(stream);
+    match tag {
+      0 =>
+        let (e, s1) = read_sys_entry(s);
+        let (i, s2) = read_count(s1);
+        (SymExpr.Var(e, i), s2),
+      1 => (SymExpr.IsFirstRow, s),
+      2 => (SymExpr.IsLastRow, s),
+      3 => (SymExpr.IsTransition, s),
+      4 => let (c, s1) = read_field(s); (SymExpr.Const(c), s1),
+      5 =>
+        let (x, s1) = read_sym_expr(s);
+        let (y, s2) = read_sym_expr(s1);
+        let (d, s3) = read_count(s2);
+        (SymExpr.Add(store(x), store(y), d), s3),
+      6 =>
+        let (x, s1) = read_sym_expr(s);
+        let (y, s2) = read_sym_expr(s1);
+        let (d, s3) = read_count(s2);
+        (SymExpr.Sub(store(x), store(y), d), s3),
+      7 =>
+        let (x, s1) = read_sym_expr(s);
+        let (d, s2) = read_count(s1);
+        (SymExpr.Neg(store(x), d), s2),
+      _ =>
+        let (x, s1) = read_sym_expr(s);
+        let (y, s2) = read_sym_expr(s1);
+        let (d, s3) = read_count(s2);
+        (SymExpr.Mul(store(x), store(y), d), s3),
+    }
+  }
+
+  fn read_sym_exprs(stream: ByteStream) -> (List‹SymExpr›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_sym_exprs_n(s, n)
+  }
+  fn read_sym_exprs_n(stream: ByteStream, n: G) -> (List‹SymExpr›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_sym_expr(stream);
+        let (rest, s2) = read_sym_exprs_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_sys_lookup(stream: ByteStream) -> (SysLookup, ByteStream) {
+    let (m, s) = read_sym_expr(stream);
+    let (args, s2) = read_sym_exprs(s);
+    (SysLookup.Mk(m, args), s2)
+  }
+  fn read_sys_lookups(stream: ByteStream) -> (List‹SysLookup›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_sys_lookups_n(s, n)
+  }
+  fn read_sys_lookups_n(stream: ByteStream, n: G) -> (List‹SysLookup›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_sys_lookup(stream);
+        let (rest, s2) = read_sys_lookups_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_sys_constraints(stream: ByteStream) -> (SysConstraints, ByteStream) {
+    let (zeros, s) = read_sym_exprs(stream);
+    let (sel_start, s1) = read_count(s);
+    let (sel_end, s2) = read_count(s1);
+    let (width, s3) = read_count(s2);
+    (SysConstraints.Mk(zeros, sel_start, sel_end, width), s3)
+  }
+
+  fn read_sys_air(stream: ByteStream) -> (SysAir, ByteStream) {
+    let (tag, s) = read_u32(stream);
+    match tag {
+      0 => let (c, s1) = read_sys_constraints(s); (SysAir.Function(c), s1),
+      1 => let (w, s1) = read_count(s); (SysAir.Memory(SysMemory.Mk(w)), s1),
+      2 => (SysAir.Bytes1, s),
+      _ => (SysAir.Bytes2, s),
+    }
+  }
+
+  fn read_sys_lookupair(stream: ByteStream) -> (SysLookupAir, ByteStream) {
+    let (inner, s) = read_sys_air(stream);
+    let (lookups, s1) = read_sys_lookups(s);
+    (SysLookupAir.Mk(inner, lookups), s1)
+  }
+
+  fn read_sys_circuit(stream: ByteStream) -> (SysCircuit, ByteStream) {
+    let (air, s) = read_sys_lookupair(stream);
+    let (cc, s1) = read_count(s);
+    let (md, s2) = read_count(s1);
+    let (ph, s3) = read_count(s2);
+    let (pw, s4) = read_count(s3);
+    let (w1, s5) = read_count(s4);
+    let (w2, s6) = read_count(s5);
+    (SysCircuit.Mk(air, cc, md, ph, pw, w1, w2), s6)
+  }
+  fn read_sys_circuits(stream: ByteStream) -> (List‹SysCircuit›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_sys_circuits_n(s, n)
+  }
+  fn read_sys_circuits_n(stream: ByteStream, n: G) -> (List‹SysCircuit›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_sys_circuit(stream);
+        let (rest, s2) = read_sys_circuits_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  -- `Option` tag is a single byte (bincode special-cases Option).
+  fn read_opt_commit(stream: ByteStream) -> (OptCommit, ByteStream) {
+    let (tag, s) = read_u8(stream);
+    match tag {
+      0 => (OptCommit.NoCommit, s),
+      _ => let (c, s1) = read_merkle_cap(s); (OptCommit.SomeCommit(c), s1),
+    }
+  }
+  fn read_opt_idx(stream: ByteStream) -> (OptIdx, ByteStream) {
+    let (tag, s) = read_u8(stream);
+    match tag {
+      0 => (OptIdx.NoIdx, s),
+      _ => let (i, s1) = read_count(s); (OptIdx.SomeIdx(i), s1),
+    }
+  }
+  fn read_opt_idx_list(stream: ByteStream) -> (List‹OptIdx›, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_opt_idx_list_n(s, n)
+  }
+  fn read_opt_idx_list_n(stream: ByteStream, n: G) -> (List‹OptIdx›, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (x, s) = read_opt_idx(stream);
+        let (rest, s2) = read_opt_idx_list_n(s, n - 1);
+        (store(ListNode.Cons(x, rest)), s2),
+    }
+  }
+
+  fn read_sys_params(stream: ByteStream) -> (SysParams, ByteStream) {
+    let (log_blowup, s) = read_count(stream);
+    let (cap_height, s1) = read_count(s);
+    (SysParams.Mk(log_blowup, cap_height), s1)
+  }
+
+  -- Full `System<AiurCircuit>`.
+  fn read_system(stream: ByteStream) -> (Sys, ByteStream) {
+    let (params, s) = read_sys_params(stream);
+    let (circuits, s1) = read_sys_circuits(s);
+    let (commit, s2) = read_opt_commit(s1);
+    let (indices, s3) = read_opt_idx_list(s2);
+    (Sys.Mk(params, circuits, commit, indices), s3)
+  }
+⟧
+
+end MultiStark
+
+end
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 616aba87..67f24cf7 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -82,11 +82,21 @@ def main : IO UInt32 := do
   IO.println s!"  serialized proof: {proofBytes.size} bytes  (~{blocks} keccak-f blocks)"
   let digest := Keccak.hash proofBytes
   IO.println s!"  keccak256(proof) = {toHex digest}"
-  -- Public input: the 32 digest bytes (grouped by the entrypoint into [[U8;8];4]).
   let digestInput : Array Aiur.G := digest.data.map .ofUInt8
-  -- IO hint under key [0]: the proof byte stream the verifier reads.
   let proofGs : Array Aiur.G := proofBytes.data.map .ofUInt8
-  let verifierIO : IOBuffer := (default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs
+
+  -- Verifying key (`System<AiurCircuit>`) bytes + its keccak-256 digest.
+  let vkBytes := facSystem.vkBytes
+  IO.println s!"  verifying key: {vkBytes.size} bytes  (~{(vkBytes.size + 1) / 136 + 1} keccak-f blocks)"
+  let sysDigest := Keccak.hash vkBytes
+  IO.println s!"  keccak256(vk)    = {toHex sysDigest}"
+  let sysDigestInput : Array Aiur.G := sysDigest.data.map .ofUInt8
+  let vkGs : Array Aiur.G := vkBytes.data.map .ofUInt8
+
+  -- Public input = proof digest ++ vk digest; IO hints: proof at [0], vk at [1].
+  let input : Array Aiur.G := digestInput ++ sysDigestInput
+  let verifierIO : IOBuffer :=
+    ((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs
 
   -- ── 4. recursive verifier system ─────────────────────────────────────────
   let vTop ← match MultiStark.multiStark with
@@ -97,15 +107,16 @@ def main : IO UInt32 := do
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1
 
-  -- ── 5. run the verifier: deserialize + recompute keccak256, assert == digest
-  IO.println "running verifier (deserialize + keccak256 over the proof bytes)…"
-  match vCompiled.bytecode.execute vIdx digestInput verifierIO with
-  | .error e => IO.eprintln s!"✗ verifier rejected the proof: {e}"; return 1
+  -- ── 5. run the verifier: deserialize proof + vk, recompute keccak digests,
+  --       reconstruct the System<AiurCircuit>, run structural checks ──────────
+  IO.println "running verifier (proof + verifying-key deserialize + keccak binding)…"
+  match vCompiled.bytecode.execute vIdx input verifierIO with
+  | .error e => IO.eprintln s!"✗ verifier rejected: {e}"; return 1
   | .ok (_, _, queryCounts) =>
-    IO.println "✓ verifier accepted: deserialized OK and keccak256(bytes) == digest"
-    -- ── 6. negative test: a tampered digest must be rejected ────────────────
-    let badDigest := digestInput.set! 0 (Aiur.G.ofNat ((digest.data[0]!.toNat + 1) % 256))
-    match vCompiled.bytecode.execute vIdx badDigest verifierIO with
+    IO.println "✓ verifier accepted: proof + vk deserialized, both keccak digests match"
+    -- ── 6. negative test: a tampered proof digest must be rejected ──────────
+    let badInput := input.set! 0 (Aiur.G.ofNat ((digest.data[0]!.toNat + 1) % 256))
+    match vCompiled.bytecode.execute vIdx badInput verifierIO with
     | .error _ => IO.println "✓ tampered digest correctly rejected (assert_eq failed)"
     | .ok _ => IO.eprintln "✗ tampered digest was NOT rejected"; return 1
     -- ── 7. circuit statistics ───────────────────────────────────────────────
diff --git a/src/ffi/aiur/protocol.rs b/src/ffi/aiur/protocol.rs
index ed2ec7b4..a26d7391 100644
--- a/src/ffi/aiur/protocol.rs
+++ b/src/ffi/aiur/protocol.rs
@@ -57,6 +57,19 @@ extern "C" fn rs_aiur_proof_of_bytes(
   LeanExternal::alloc(&AIUR_PROOF_CLASS, proof)
 }
 
+/// `Aiur.AiurSystem.vkBytes : @& AiurSystem → ByteArray`
+///
+/// Serializes the verifying key (`System<AiurCircuit>`) — see
+/// `crate::aiur::vk_codec`.
+#[unsafe(no_mangle)]
+extern "C" fn rs_aiur_system_vk_bytes(
+  system: LeanExternal<AiurSystem, LeanBorrowed<'_>>,
+) -> LeanByteArray<LeanOwned> {
+  let bytes = crate::aiur::vk_codec::aiur_system_to_bytes(system.get())
+    .expect("VK serialization error");
+  LeanByteArray::from_bytes(&bytes)
+}
+
 /// `AiurSystem.build : @&Bytecode.Toplevel → @&CommitmentParameters → AiurSystem`
 #[unsafe(no_mangle)]
 extern "C" fn rs_aiur_system_build(

From aa99a6c16823a9466072670ac11690949124297f Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Fri, 29 May 2026 14:09:24 -0300
Subject: [PATCH 14/28] OOD check

---
 Ix/MultiStark.lean          |   9 +-
 Ix/MultiStark/Verifier.lean | 369 +++++++++++++++++++++++++++++++++++-
 2 files changed, 371 insertions(+), 7 deletions(-)

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index a37c19a5..e9b5af3f 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -46,8 +46,15 @@ def entrypoints := ⟦
     assert_eq!(keccak256(sbytes), system_digest);
     let (_sys, srest) = read_system(sbytes);
     assert_eq!(load(srest), ListNode.Nil);
-    -- Structural + accumulator + Fiat-Shamir checks (OOD will use `_sys`).
+    -- Structural + accumulator + Fiat-Shamir checks.
     assert_eq!(verify(proof), 1);
+    -- Step 5 (out-of-domain composition/quotient check) is fully implemented in
+    -- `ood_verify(_sys, proof)` (`Ix/MultiStark/Verifier.lean`). It is NOT asserted
+    -- here yet because it consumes the Fiat-Shamir challenge ζ, and the current
+    -- `fiat_shamir` is not prover-faithful (it does not observe the preprocessed
+    -- commitment or the claims, and does no rejection sampling), so the ζ it
+    -- derives differs from the prover's. Enabling a sound OOD check is gated on
+    -- that Fiat-Shamir milestone; `ood_verify` is ready to wire in once it lands.
     ()
   }
 ⟧
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index fe29274f..dc17289b 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -5,6 +5,7 @@ public import Ix.IxVM.ByteStream
 public import Ix.MultiStark.Deserialize
 public import Ix.MultiStark.Keccak
 public import Ix.MultiStark.Pcs
+public import Ix.MultiStark.SystemDeserialize
 
 /-!
 # Multi-STARK verifier (Aiur)
@@ -30,15 +31,26 @@ The Rust verifier runs these steps:
   (the circuit count) and it is non-zero.
 * Step 2: accumulator balance — the last `intermediate_accumulator` is the zero
   extension element.
+* Step 3: the Fiat-Shamir challenger replay structure (`fiat_shamir`).
 * Step 4: the PCS check, via the accept-stub `pcs_verify`.
+* Step 5: the out-of-domain composition/quotient check (`ood_verify`). For each
+  circuit it recomputes `composition(ζ)` by replaying the AIR constraint folder
+  (`VerifierConstraintFolder` + `LookupAir::eval`) over the deserialized
+  symbolic system and the opened values, recomputes `quotient(ζ)` from the
+  opened quotient chunks (barycentric `zps` weights over the split quotient
+  domains), and asserts `composition(ζ) · inv_vanishing(ζ) == quotient(ζ)`.
 
 ### Stubbed / TODO
-* Steps 3 and 5 need extension-field (`GoldilocksExt2`) arithmetic, evaluation
-  domains (vanishing polynomials, subgroup generators), the Keccak Fiat-Shamir
-  challenger (`SerializingChallenger64<HashChallenger<u8, Keccak256Hash, 32>>`,
-  which our `keccak256` can drive), and the per-circuit AIR constraint folder.
-  None of that infrastructure exists in Aiur yet, so these steps are left as the
-  next milestone. With the PCS stubbed and steps 3/5 absent, this verifier is a
+* `fiat_shamir` is structurally complete but **not prover-faithful**: it does
+  not observe the preprocessed commitment or the public claims, and does no
+  rejection sampling, so the challenges (in particular ζ) it derives diverge
+  from the prover's whenever the system has preprocessed circuits or claims.
+  Because `ood_verify` consumes ζ, it can only pass end-to-end once Fiat-Shamir
+  is made faithful — that is the remaining milestone. `ood_verify` is therefore
+  implemented and unit-tested (`ood_quotient_test`) but not yet asserted in the
+  entrypoint (see `Ix/MultiStark.lean`).
+* The PCS opening proof (`pcs_verify`) is still an accept-stub.
+  With the PCS stubbed and Fiat-Shamir not yet faithful, this verifier is a
   **structural** verifier, not yet sound.
 -/
 
@@ -326,6 +338,351 @@ def verifier := ⟦
     assert_eq!(prod[1], 0);
     1
   }
+
+  -- ==========================================================================
+  -- Step 5: out-of-domain (OOD) evaluation.
+  --
+  -- Mirrors the per-circuit loop in `verifier.rs::verify_multiple_claims`
+  -- (lines 329-434). For each circuit it recomputes the composition polynomial
+  -- `composition(ζ)` from the opened values by replaying the AIR constraint
+  -- folder (`VerifierConstraintFolder` + `LookupAir::eval`), recomputes the
+  -- quotient `quotient(ζ)` from the opened quotient chunks via the barycentric
+  -- weights `zps`, and asserts
+  --   composition(ζ) · inv_vanishing(ζ) == quotient(ζ).
+  --
+  -- The challenges (lookup, fingerprint, α, ζ) come from `fiat_shamir` above.
+  -- As with `fiat_shamir`, claims are not observed; this assumes the no-claims
+  -- case, so the running lookup accumulator starts at the zero extension
+  -- element (`acc` in Rust = ExtVal::ZERO).
+  -- ==========================================================================
+
+  -- One Horner fold step of the constraint folder: `acc := acc·α + x`
+  -- (`VerifierConstraintFolder::assert_zero` / `assert_zero_ext`).
+  fn ood_fold(acc: Ext, alpha: Ext, x: Ext) -> Ext {
+    ext_add(ext_mul(acc, alpha), x)
+  }
+
+  -- Reconstruct an extension element from its two opened base coordinates,
+  -- `from_ext_basis([c0, c1]) = c0 + c1·X` (the ExtVal basis is `[1, X]`).
+  fn from_ext_basis(c0: Ext, c1: Ext) -> Ext {
+    ext_add(c0, ext_mul(c1, [0, 1]))
+  }
+
+  -- A stage-2 / quotient opened row arrives as `stage_2_width·2` extension
+  -- coordinates; fold consecutive pairs back into `stage_2_width` extension
+  -- elements (Rust: `chunks_exact(2).map(from_ext_basis)`).
+  fn reconstruct_ext_row(raw: List‹Ext›) -> List‹Ext› {
+    match load(raw) {
+      ListNode.Nil => store(ListNode.Nil),
+      ListNode.Cons(c0, t1) =>
+        let ListNode.Cons(c1, t2) = load(t1);
+        store(ListNode.Cons(from_ext_basis(c0, c1), reconstruct_ext_row(t2))),
+    }
+  }
+
+  -- Evaluate a symbolic AIR expression at the opened point (Rust:
+  -- `SymbolicExpression::interpret`). Function-circuit `zeros` and lookup
+  -- args/multiplicities only ever reference `Main`/`Preprocessed` entries
+  -- (offset 0) and constants; the other entries never appear here.
+  fn eval_sym(e: SymExpr, main: List‹Ext›, prep: List‹Ext›) -> Ext {
+    match e {
+      SymExpr.Var(entry, idx) =>
+        match entry {
+          SysEntry.Main(_o) => list_lookup(main, idx),
+          SysEntry.Preprocessed(_o) => list_lookup(prep, idx),
+          SysEntry.Stage2(_o) => [0, 0],
+          SysEntry.Public => [0, 0],
+          SysEntry.Stage2Public => [0, 0],
+          SysEntry.Challenge => [0, 0],
+        },
+      SymExpr.IsFirstRow => [0, 0],
+      SymExpr.IsLastRow => [0, 0],
+      SymExpr.IsTransition => [0, 0],
+      SymExpr.Const(c) => [c, 0],
+      SymExpr.Add(x, y, _d) => ext_add(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
+      SymExpr.Sub(x, y, _d) => ext_sub(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
+      SymExpr.Neg(x, _d) => ext_neg(eval_sym(load(x), main, prep)),
+      SymExpr.Mul(x, y, _d) => ext_mul(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
+    }
+  }
+
+  -- `fingerprint(r, args) = Σ argᵢ·rⁱ` (Horner over the reversed argument list,
+  -- `lookup.rs::fingerprint`).
+  fn fingerprint_ext(r: Ext, args: List‹SymExpr›, main: List‹Ext›, prep: List‹Ext›) -> Ext {
+    match load(args) {
+      ListNode.Nil => [0, 0],
+      ListNode.Cons(a, rest) =>
+        ext_add(eval_sym(a, main, prep), ext_mul(r, fingerprint_ext(r, rest, main, prep))),
+    }
+  }
+
+  -- Fold the inner-AIR `zeros` constraints (Function circuit): each is asserted
+  -- zero on the main (stage-1) row, with no preprocessed row.
+  fn fold_zeros(acc: Ext, alpha: Ext, zeros: List‹SymExpr›, main: List‹Ext›) -> Ext {
+    match load(zeros) {
+      ListNode.Nil => acc,
+      ListNode.Cons(z, rest) =>
+        fold_zeros(ood_fold(acc, alpha, eval_sym(z, main, store(ListNode.Nil))), alpha, rest, main),
+    }
+  }
+
+  -- Fold the selector boolean checks (Function circuit): `assert_bool(row[s])`
+  -- = `assert_zero(s·(s-1))` for `s` in the selector range `[idx, idx+count)`.
+  fn fold_sel_bools(acc: Ext, alpha: Ext, main: List‹Ext›, idx: G, count: G) -> Ext {
+    match count {
+      0 => acc,
+      _ =>
+        let x = list_lookup(main, idx);
+        let bc = ext_mul(x, ext_sub(x, [1, 0]));
+        fold_sel_bools(ood_fold(acc, alpha, bc), alpha, main, idx + 1, count - 1),
+    }
+  }
+
+  -- Fold the per-lookup constraints (`LookupAir::eval`): for lookup `k`,
+  -- `assert_one_ext(messageₖ · minvₖ)` = `assert_zero_ext(messageₖ·minvₖ - 1)`,
+  -- where `minvₖ = stage_2_row[1+k]` and
+  -- `messageₖ = lookup_challenge + fingerprint(fingerprint_challenge, argsₖ)`.
+  -- Simultaneously builds `acc_expr = stage_2_row[0] + Σ multiplicityₖ·minvₖ`.
+  -- Returns `(folder_acc, acc_expr)`.
+  fn fold_lookups(acc: Ext, alpha: Ext, lookups: List‹SysLookup›, k: G,
+      main: List‹Ext›, prep: List‹Ext›, s2row: List‹Ext›,
+      lch: Ext, fch: Ext, acc_expr: Ext) -> (Ext, Ext) {
+    match load(lookups) {
+      ListNode.Nil => (acc, acc_expr),
+      ListNode.Cons(lk, rest) =>
+        let SysLookup.Mk(mult_e, args) = lk;
+        let minv = list_lookup(s2row, k + 1);
+        let mult = eval_sym(mult_e, main, prep);
+        let fp = fingerprint_ext(fch, args, main, prep);
+        let message = ext_add(lch, fp);
+        let c = ext_sub(ext_mul(message, minv), [1, 0]);
+        let acc = ood_fold(acc, alpha, c);
+        let acc_expr = ext_add(acc_expr, ext_mul(mult, minv));
+        fold_lookups(acc, alpha, rest, k + 1, main, prep, s2row, lch, fch, acc_expr),
+    }
+  }
+
+  -- The composition polynomial `composition(ζ)` for one circuit: replays the
+  -- inner-AIR constraints (per air kind) followed by the lookup-argument
+  -- constraints, folding each with `α` exactly as `LookupAir::eval` drives the
+  -- verifier folder. `accp`/`naccp` are the current/next lookup accumulators.
+  fn ood_composition(air: SysAir, lookups: List‹SysLookup›,
+      main: List‹Ext›, main_next: List‹Ext›, s2row: List‹Ext›, s2next: List‹Ext›,
+      prep: List‹Ext›, isf: Ext, isl: Ext, ist: Ext,
+      lch: Ext, fch: Ext, accp: Ext, naccp: Ext, alpha: Ext) -> Ext {
+    -- inner-AIR constraints first, then hand the accumulator to the lookup tail
+    -- (split so each `match air` arm ends in a tail call — Aiur forbids
+    -- non-tail matches).
+    match air {
+      SysAir.Function(c) =>
+        let SysConstraints.Mk(zeros, ss, se, _w) = c;
+        let acc = fold_zeros([0, 0], alpha, zeros, main);
+        let acc = fold_sel_bools(acc, alpha, main, ss, se - ss);
+        ood_comp_tail(acc, lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
+      SysAir.Memory(m) =>
+        let SysMemory.Mk(_w) = m;
+        -- `Memory::eval`: is_real = col 1, ptr = col 2 (current and next row).
+        let is_real = list_lookup(main, 1);
+        let ptr = list_lookup(main, 2);
+        let is_real_next = list_lookup(main_next, 1);
+        let ptr_next = list_lookup(main_next, 2);
+        -- assert_bool(is_real)
+        let acc = ood_fold([0, 0], alpha, ext_mul(is_real, ext_sub(is_real, [1, 0])));
+        -- is_real_transition = is_real_next · is_transition
+        let irt = ext_mul(is_real_next, ist);
+        -- when(irt).assert_one(is_real) = irt·(is_real - 1)
+        let acc = ood_fold(acc, alpha, ext_mul(irt, ext_sub(is_real, [1, 0])));
+        -- when(irt).assert_eq(ptr+1, ptr_next) = irt·(ptr + 1 - ptr_next)
+        let acc = ood_fold(acc, alpha, ext_mul(irt, ext_sub(ext_add(ptr, [1, 0]), ptr_next)));
+        ood_comp_tail(acc, lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
+      SysAir.Bytes1 =>
+        ood_comp_tail([0, 0], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
+      SysAir.Bytes2 =>
+        ood_comp_tail([0, 0], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
+    }
+  }
+
+  -- Tail of `ood_composition`: fold the lookup-argument constraints onto the
+  -- inner-AIR accumulator, then the three accumulator boundary constraints.
+  fn ood_comp_tail(acc: Ext, lookups: List‹SysLookup›, main: List‹Ext›, prep: List‹Ext›,
+      s2row: List‹Ext›, s2next: List‹Ext›, isf: Ext, isl: Ext, ist: Ext,
+      lch: Ext, fch: Ext, accp: Ext, naccp: Ext, alpha: Ext) -> Ext {
+    let (acc, acc_expr) =
+      fold_lookups(acc, alpha, lookups, 0, main, prep, s2row, lch, fch, list_lookup(s2row, 0));
+    let acc_col = list_lookup(s2row, 0);
+    let next_acc_col = list_lookup(s2next, 0);
+    -- when_first_row: acc_col = accp
+    let acc = ood_fold(acc, alpha, ext_mul(isf, ext_sub(acc_col, accp)));
+    -- when_transition: acc_expr = next_acc_col
+    let acc = ood_fold(acc, alpha, ext_mul(ist, ext_sub(acc_expr, next_acc_col)));
+    -- when_last_row: acc_expr = naccp
+    ood_fold(acc, alpha, ext_mul(isl, ext_sub(acc_expr, naccp)))
+  }
+
+  -- ==========================================================================
+  -- Quotient evaluation from the opened quotient chunks.
+  --
+  -- The trace domain is the order-2^L subgroup H (shift = 1). The quotient
+  -- domain is the disjoint coset `7·H'` of size `2^(L+log_qd)` (7 = Goldilocks
+  -- GENERATOR), split into `qd = 2^log_qd` chunk domains `Dⱼ` of size `2^L`,
+  -- shift `7·g_q^j` where `g_q = two_adic_gen(L + log_qd)`. Then
+  --   quotient(ζ) = Σⱼ zpsⱼ · from_ext_basis(chunkⱼ),
+  --   zpsⱼ = Πₖ≠ⱼ Z_{Dₖ}(ζ) / Z_{Dₖ}(first_point(Dⱼ)),
+  -- with `Z_{Dₖ}(x) = (x · shift_k⁻¹)^(2^L) - 1`.
+  -- ==========================================================================
+
+  -- base-field power `base^e` (e small: the chunk index, < qd).
+  fn g_pow(base: G, e: G) -> G {
+    match e {
+      0 => 1,
+      _ => base * g_pow(base, e - 1),
+    }
+  }
+
+  -- `Z_{Dⱼ}(x) = (x · shift_j⁻¹)^(2^L) - 1`, evaluated at extension point `x`.
+  fn vanish_chunk(x: Ext, l: G, shiftinv: G) -> Ext {
+    ext_sub(ext_exp_pow2(ext_mul(x, [shiftinv, 0]), l), [1, 0])
+  }
+
+  -- `zpsₜ = Πⱼ≠ₜ Z_{Dⱼ}(ζ) / Z_{Dⱼ}(shift_t)`. Iterates j over `[jidx, jidx+rem)`.
+  fn zps_prod(acc: Ext, zeta: Ext, l: G, g_q: G, shift_t: G, jidx: G, rem: G, t: G) -> Ext {
+    match rem {
+      0 => acc,
+      _ =>
+        let shiftinv = g_inverse(7 * g_pow(g_q, jidx));
+        -- skip the j = t factor (the chunk's own domain); branch in tail
+        -- position so the inner match is not a non-tail match.
+        match eq_zero(jidx - t) {
+          1 => zps_prod(acc, zeta, l, g_q, shift_t, jidx + 1, rem - 1, t),
+          _ =>
+            let factor = ext_mul(vanish_chunk(zeta, l, shiftinv),
+                                 ext_inverse(vanish_chunk([shift_t, 0], l, shiftinv)));
+            zps_prod(ext_mul(acc, factor), zeta, l, g_q, shift_t, jidx + 1, rem - 1, t),
+        },
+    }
+  }
+
+  -- `quotient(ζ) = Σₜ zpsₜ · from_ext_basis(chunkₜ)`, iterating the `qd` chunks
+  -- (`q_opened[idx][0] = [c0, c1]`).
+  fn quotient_sum(acc: Ext, zeta: Ext, l: G, qd: G, g_q: G,
+      q_opened: OpenedRound, idx: G, rem: G, t: G) -> Ext {
+    match rem {
+      0 => acc,
+      _ =>
+        let shift_t = 7 * g_pow(g_q, t);
+        let zps_t = zps_prod([1, 0], zeta, l, g_q, shift_t, 0, qd, t);
+        let ch = list_lookup(q_opened, idx);
+        let row = list_lookup(ch, 0);
+        let qv = from_ext_basis(list_lookup(row, 0), list_lookup(row, 1));
+        quotient_sum(ext_add(acc, ext_mul(zps_t, qv)), zeta, l, qd, g_q,
+                     q_opened, idx + 1, rem - 1, t + 1),
+    }
+  }
+
+  -- `quotient_degree = (max(md, 2) - 1).next_power_of_two()` and its log2.
+  -- Tabulated for `max_constraint_degree ≤ 17` (covers all current circuits);
+  -- larger degrees fall through to the `_` arm.
+  fn quotient_degree_of(md: G) -> G {
+    match md {
+      0 => 1, 1 => 1, 2 => 1,
+      3 => 2,
+      4 => 4, 5 => 4,
+      6 => 8, 7 => 8, 8 => 8, 9 => 8,
+      10 => 16, 11 => 16, 12 => 16, 13 => 16, 14 => 16, 15 => 16, 16 => 16, 17 => 16,
+      _ => 32,
+    }
+  }
+  fn log_qd_of(md: G) -> G {
+    match md {
+      0 => 0, 1 => 0, 2 => 0,
+      3 => 1,
+      4 => 2, 5 => 2,
+      6 => 3, 7 => 3, 8 => 3, 9 => 3,
+      10 => 4, 11 => 4, 12 => 4, 13 => 4, 14 => 4, 15 => 4, 16 => 4, 17 => 4,
+      _ => 5,
+    }
+  }
+
+  -- The preprocessed opened row at ζ for circuit `i`, or `Nil` if the circuit
+  -- has no preprocessed trace (`preprocessed_indices[i] = None`).
+  fn ood_prep_row(prep_opt: PreprocessedOpt, oi: OptIdx) -> List‹Ext› {
+    match oi {
+      OptIdx.NoIdx => store(ListNode.Nil),
+      OptIdx.SomeIdx(j) =>
+        match prep_opt {
+          PreprocessedOpt.NoPreprocessed => store(ListNode.Nil),
+          PreprocessedOpt.SomePreprocessed(round) =>
+            let pr = list_lookup(round, j);
+            list_lookup(pr, 0),
+        },
+    }
+  }
+
+  -- Per-circuit OOD loop: for each circuit, recompute composition(ζ) and
+  -- quotient(ζ) and assert `composition · inv_vanishing == quotient`. Threads
+  -- the running lookup accumulator `accp` and the quotient-chunk offset `lastq`.
+  fn ood_loop(circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›,
+      log_degrees: List‹U8›, accs: List‹Ext›,
+      stage1: OpenedRound, stage2: OpenedRound, prep_opt: PreprocessedOpt,
+      q_opened: OpenedRound, i: G, accp: Ext, lastq: G,
+      lch: Ext, fch: Ext, alpha: Ext, zeta: Ext) -> G {
+    match load(circuits) {
+      ListNode.Nil => 1,
+      ListNode.Cons(circ, rest) =>
+        let SysCircuit.Mk(lair, _cc, md, _ph, _pw, _w1, _w2) = circ;
+        let SysLookupAir.Mk(air, lookups) = lair;
+        let l = to_field(list_lookup(log_degrees, i));
+        let qd = quotient_degree_of(md);
+        let log_qd = log_qd_of(md);
+        let naccp = list_lookup(accs, i);
+        let s1 = list_lookup(stage1, i);
+        let main = list_lookup(s1, 0);
+        let main_next = list_lookup(s1, 1);
+        let s2 = list_lookup(stage2, i);
+        let s2row = reconstruct_ext_row(list_lookup(s2, 0));
+        let s2next = reconstruct_ext_row(list_lookup(s2, 1));
+        let prep = ood_prep_row(prep_opt, list_lookup(prep_indices, i));
+        let (isf, isl, ist, invv) = trace_selectors(zeta, l);
+        let comp = ood_composition(air, lookups, main, main_next, s2row, s2next,
+                                   prep, isf, isl, ist, lch, fch, accp, naccp, alpha);
+        let g_q = two_adic_gen(l + log_qd);
+        let quot = quotient_sum([0, 0], zeta, l, qd, g_q, q_opened, lastq, qd, 0);
+        assert_eq!(ext_mul(comp, invv), quot);
+        ood_loop(rest, prep_indices, log_degrees, accs, stage1, stage2, prep_opt,
+                 q_opened, i + 1, naccp, lastq + qd, lch, fch, alpha, zeta),
+    }
+  }
+
+  -- Step 5 entrypoint: derive the challenges via Fiat-Shamir, then run the OOD
+  -- composition/quotient check for every circuit. Returns 1 on success (any
+  -- mismatch aborts via `assert_eq!`).
+  fn ood_verify(sys: Sys, proof: Proof) -> G {
+    let Sys.Mk(_params, circuits, _commit, prep_indices) = sys;
+    match proof {
+      Proof.Mk(commitments, accs, log_degrees, _opening,
+               q_opened, prep_opt, stage1, stage2) =>
+        let Commitments.Mk(s1c, s2c, qc) = commitments;
+        let (lch, fch, alpha, zeta) = fiat_shamir(s1c, s2c, qc, log_degrees);
+        ood_loop(circuits, prep_indices, log_degrees, accs, stage1, stage2,
+                 prep_opt, q_opened, 0, [0, 0], 0, lch, fch, alpha, zeta),
+    }
+  }
+
+  -- Self-test for the quotient-domain math: the chunk vanishing polynomial is
+  -- zero at the chunk's own first point, and `from_ext_basis` reassembles a
+  -- known extension element.
+  pub fn ood_quotient_test() -> G {
+    -- L = 1, log_qd = 1 → g_q = two_adic_gen(2); chunk 0 has shift 7.
+    let sinv0 = g_inverse(7);
+    let z = vanish_chunk([7, 0], 1, sinv0);
+    assert_eq!(z[0], 0);
+    assert_eq!(z[1], 0);
+    -- from_ext_basis([2,3], [5,7]) = [2,3] + [5,7]·X = [2 + 7·7, 3 + 5] = [51, 8].
+    let fb = from_ext_basis([2, 3], [5, 7]);
+    assert_eq!(fb[0], 51);
+    assert_eq!(fb[1], 8);
+    1
+  }
 ⟧
 
 end MultiStark

From 9bea69227fa8e17556bd48523cc70ac2e1b5c1a0 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Fri, 29 May 2026 15:15:44 -0300
Subject: [PATCH 15/28] Claims

---
 Ix/MultiStark.lean          |  25 +++---
 Ix/MultiStark/Verifier.lean | 175 +++++++++++++++++++++++++++---------
 RecursiveVerifier.lean      |  46 +++++++++-
 3 files changed, 190 insertions(+), 56 deletions(-)

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index e9b5af3f..e1e72c4f 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -31,7 +31,7 @@ def entrypoints := ⟦
   -- deserialize into a `Proof` object (asserting the whole stream was consumed),
   -- then recompute keccak-256 over the same bytes and assert it equals `digest`
   -- — binding the IO-fed bytes to the public commitment.
-  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4], system_digest: [[U8; 8]; 4]) {
+  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4], system_digest: [[U8; 8]; 4], claims_digest: [[U8; 8]; 4]) {
     -- Proof from IO key [0]: deserialize, assert fully consumed, and bind the
     -- bytes to the public keccak-256 `digest`.
     let (idx, len) = io_get_info([0]);
@@ -44,17 +44,22 @@ def entrypoints := ⟦
     let (sidx, slen) = io_get_info([1]);
     let sbytes = #read_byte_stream(sidx, slen);
     assert_eq!(keccak256(sbytes), system_digest);
-    let (_sys, srest) = read_system(sbytes);
+    let (sys, srest) = read_system(sbytes);
     assert_eq!(load(srest), ListNode.Nil);
-    -- Structural + accumulator + Fiat-Shamir checks.
+    -- Public claims (`&[&[Val]]`) from IO key [2]: bind the bytes to the public
+    -- keccak-256 `claims_digest`, then deserialize. Binding them as a public
+    -- input is what makes the lookup argument sound (a prover cannot choose
+    -- claims adaptively).
+    let (cidx, clen) = io_get_info([2]);
+    let cbytes = #read_byte_stream(cidx, clen);
+    assert_eq!(keccak256(cbytes), claims_digest);
+    let (claims, crest) = read_claims(cbytes);
+    assert_eq!(load(crest), ListNode.Nil);
+    -- Structural + accumulator + PCS checks.
     assert_eq!(verify(proof), 1);
-    -- Step 5 (out-of-domain composition/quotient check) is fully implemented in
-    -- `ood_verify(_sys, proof)` (`Ix/MultiStark/Verifier.lean`). It is NOT asserted
-    -- here yet because it consumes the Fiat-Shamir challenge ζ, and the current
-    -- `fiat_shamir` is not prover-faithful (it does not observe the preprocessed
-    -- commitment or the claims, and does no rejection sampling), so the ζ it
-    -- derives differs from the prover's. Enabling a sound OOD check is gated on
-    -- that Fiat-Shamir milestone; `ood_verify` is ready to wire in once it lands.
+    -- Step 3 + 5: prover-faithful Fiat-Shamir replay and the out-of-domain
+    -- composition/quotient check, `composition(ζ)·inv_vanishing(ζ) == quotient(ζ)`.
+    assert_eq!(ood_verify(sys, proof, claims), 1);
     ()
   }
 ⟧
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index dc17289b..e70a565d 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -31,27 +31,29 @@ The Rust verifier runs these steps:
   (the circuit count) and it is non-zero.
 * Step 2: accumulator balance — the last `intermediate_accumulator` is the zero
   extension element.
-* Step 3: the Fiat-Shamir challenger replay structure (`fiat_shamir`).
-* Step 4: the PCS check, via the accept-stub `pcs_verify`.
+* Step 3: the Fiat-Shamir challenger replay (`fiat_shamir`). Prover-faithful:
+  observes the verifying key's preprocessed commitment, the stage_1 commitment,
+  the trace heights, and the public claims (in that order), then samples and
+  re-observes the lookup/fingerprint challenges, observes stage_2, samples α,
+  observes the quotient commitment, and samples ζ — matching
+  `verify_multiple_claims` byte-for-byte.
 * Step 5: the out-of-domain composition/quotient check (`ood_verify`). For each
   circuit it recomputes `composition(ζ)` by replaying the AIR constraint folder
   (`VerifierConstraintFolder` + `LookupAir::eval`) over the deserialized
   symbolic system and the opened values, recomputes `quotient(ζ)` from the
   opened quotient chunks (barycentric `zps` weights over the split quotient
   domains), and asserts `composition(ζ) · inv_vanishing(ζ) == quotient(ζ)`.
+  Validated end-to-end against a real factorial proof (`RecursiveVerifier.lean`):
+  the verifier accepts the honest proof and rejects a tampered claim.
 
 ### Stubbed / TODO
-* `fiat_shamir` is structurally complete but **not prover-faithful**: it does
-  not observe the preprocessed commitment or the public claims, and does no
-  rejection sampling, so the challenges (in particular ζ) it derives diverge
-  from the prover's whenever the system has preprocessed circuits or claims.
-  Because `ood_verify` consumes ζ, it can only pass end-to-end once Fiat-Shamir
-  is made faithful — that is the remaining milestone. `ood_verify` is therefore
-  implemented and unit-tested (`ood_quotient_test`) but not yet asserted in the
-  entrypoint (see `Ix/MultiStark.lean`).
-* The PCS opening proof (`pcs_verify`) is still an accept-stub.
-  With the PCS stubbed and Fiat-Shamir not yet faithful, this verifier is a
-  **structural** verifier, not yet sound.
+* `fiat_shamir` does no rejection sampling: a sampled 8-byte limb in the band
+  `[p, 2⁶⁴)` (probability ≈ 2⁻³²) is reduced mod p instead of being rejected
+  and resampled. For small proofs no sample lands in the band, but honest
+  rejection sampling is needed for full generality.
+* The PCS opening proof (`pcs_verify`) is still an accept-stub. With the PCS
+  stubbed, this verifier checks every algebraic relation except FRI proximity,
+  so it is not yet fully sound.
 -/
 
 public section
@@ -166,29 +168,75 @@ def verifier := ⟦
     (c0, c1, i1, o1)
   }
 
+  -- Append (observe) 8 little-endian bytes of `b` at the END of the challenger
+  -- input buffer. The transcript is held front-to-back (front = first observed =
+  -- first hashed, matching `keccak256`'s absorption order), so an observation
+  -- appends — `b8_onto` PREPENDS, hence the `list_concat`.
+  fn snoc_b8(input: ByteStream, b: [U8; 8]) -> ByteStream {
+    list_concat(input, b8_onto(b, store(ListNode.Nil)))
+  }
+  -- Append (observe) a commitment (`MerkleCap`) at the end of the buffer.
+  fn snoc_cap(input: ByteStream, cap: MerkleCap) -> ByteStream {
+    list_concat(input, cap_onto(cap, store(ListNode.Nil)))
+  }
+
+  -- Append a claim's values (each `Val` as 8 LE bytes) onto `tail`, in order.
+  fn claim_vals_onto(vals: List‹U64›, tail: ByteStream) -> ByteStream {
+    match load(vals) {
+      ListNode.Nil => tail,
+      ListNode.Cons(v, rest) => b8_onto(v, claim_vals_onto(rest, tail)),
+    }
+  }
+  -- Append every claim's values (`challenger.observe_slice(claim)` per claim).
+  fn claims_onto(claims: List‹List‹U64››, tail: ByteStream) -> ByteStream {
+    match load(claims) {
+      ListNode.Nil => tail,
+      ListNode.Cons(c, rest) => claim_vals_onto(c, claims_onto(rest, tail)),
+    }
+  }
+
+  -- The preprocessed commitment cap from the verifying key, or an empty cap
+  -- (observes nothing) when there is none.
+  fn opt_commit_cap(commit: OptCommit) -> MerkleCap {
+    match commit {
+      OptCommit.NoCommit => store(ListNode.Nil),
+      OptCommit.SomeCommit(c) => c,
+    }
+  }
+
   -- Replay the verifier transcript and derive the four challenges
   -- `(lookup, fingerprint, alpha, zeta)`. Mirrors `verify_multiple_claims`'s
-  -- challenger sequence. SIMPLIFICATIONS (TODO): the preprocessed commitment
-  -- (from the verifying key) and the claims are not observed yet, and no
-  -- rejection sampling is done — so the values are not yet prover-faithful when
-  -- the system has preprocessed circuits / claims. The structure is exact.
-  fn fiat_shamir(s1: MerkleCap, s2: MerkleCap, q: MerkleCap, lds: List‹U8›)
-      -> (Ext, Ext, Ext, Ext) {
-    -- observe stage_1 commitment, then the trace heights (log_degrees)
-    let input = cap_onto(s1, store(ListNode.Nil));
+  -- challenger sequence exactly:
+  --   observe preprocessed_commit (if any) → stage_1 → log_degrees → claims;
+  --   sample lookup, observe it; sample fingerprint, observe it;
+  --   observe stage_2; sample α; observe quotient; sample ζ.
+  -- `observe` clears the challenger's output buffer, and every sample here is
+  -- preceded by an observe, so each `ch_sample_ext` re-flushes from an empty
+  -- output (hence the `store(ListNode.Nil)` output argument each time).
+  -- NOTE: the ~2⁻³² rejection band (a sampled 8-byte limb ≥ p) is still not
+  -- handled (`ch_sample8` reduces mod p); for small proofs no sample lands in
+  -- the band, but honest rejection sampling remains a TODO for full generality.
+  fn fiat_shamir(prep: MerkleCap, s1: MerkleCap, s2: MerkleCap, q: MerkleCap,
+      lds: List‹U8›, claims: List‹List‹U64››) -> (Ext, Ext, Ext, Ext) {
+    -- Initial transcript, front-to-back: prep, stage_1, log_degrees, claims.
+    -- Built inner-to-outer with the prepend helpers so the result is in
+    -- forward (observation) order.
+    let input = claims_onto(claims, store(ListNode.Nil));
     let input = log_degrees_onto(lds, input);
-    -- sample lookup challenge, then observe it back
+    let input = cap_onto(s1, input);
+    let input = cap_onto(prep, input);
+    -- sample lookup challenge, then observe it back (append)
     let (l0, l1, input, _ol) = ch_sample_ext(input, store(ListNode.Nil));
-    let input = b8_onto(l0, b8_onto(l1, input));
+    let input = snoc_b8(snoc_b8(input, l0), l1);
     -- sample fingerprint challenge, then observe it back
     let (f0, f1, input, _of) = ch_sample_ext(input, store(ListNode.Nil));
-    let input = b8_onto(f0, b8_onto(f1, input));
+    let input = snoc_b8(snoc_b8(input, f0), f1);
     -- observe stage_2 commitment
-    let input = cap_onto(s2, input);
+    let input = snoc_cap(input, s2);
     -- sample constraint challenge α (not observed)
     let (a0, a1, input, _oa) = ch_sample_ext(input, store(ListNode.Nil));
     -- observe quotient commitment
-    let input = cap_onto(q, input);
+    let input = snoc_cap(input, q);
     -- sample out-of-domain point ζ
     let (z0, z1, _input, _oz) = ch_sample_ext(input, store(ListNode.Nil));
     ([limb_to_field(l0), limb_to_field(l1)],
@@ -295,13 +343,15 @@ def verifier := ⟦
     1
   }
 
-  -- Structural + accumulator + Fiat-Shamir verification of a deserialized proof.
+  -- Structural + accumulator + PCS checks of a deserialized proof (steps 1, 2,
+  -- 4). Fiat-Shamir (step 3) and the OOD check (step 5) live in `ood_verify`,
+  -- which needs the verifying key and the claims.
   --
   -- Returns 1 on success; `assert_eq!` aborts the (proof) execution on any
   -- failed check, exactly as the Rust verifier returns `Err`.
   fn verify(proof: Proof) -> G {
     match proof {
-      Proof.Mk(commitments, accs, log_degrees, opening,
+      Proof.Mk(_commitments, accs, _log_degrees, opening,
                _quotient, _preprocessed, stage_1, stage_2) =>
         -- Step 1 (shape, system-independent): the per-round opened-value lists
         -- and the accumulator list all have the same length = the circuit count.
@@ -314,16 +364,8 @@ def verifier := ⟦
         -- Step 2: accumulator balance — the last accumulator must be zero.
         assert_eq!(last_acc_is_zero(accs), 1);
 
-        -- Step 3: Fiat-Shamir challenger replay → (lookup, fingerprint, α, ζ).
-        -- Derived here; consumed by the OOD check (step 5, still TODO).
-        let Commitments.Mk(s1, s2, q) = commitments;
-        let _challenges = fiat_shamir(s1, s2, q, log_degrees);
-
         -- Step 4: PCS opening proof (stubbed — accepts; see Pcs.lean).
         let _pcs = pcs_verify(opening);
-
-        -- Step 5 (OOD composition/quotient check) is TODO — it needs the
-        -- per-circuit AIR constraint folder and evaluation-domain arithmetic.
         1,
     }
   }
@@ -653,21 +695,68 @@ def verifier := ⟦
     }
   }
 
-  -- Step 5 entrypoint: derive the challenges via Fiat-Shamir, then run the OOD
-  -- composition/quotient check for every circuit. Returns 1 on success (any
-  -- mismatch aborts via `assert_eq!`).
-  fn ood_verify(sys: Sys, proof: Proof) -> G {
-    let Sys.Mk(_params, circuits, _commit, prep_indices) = sys;
+  -- The fingerprint of one claim's values: `Σ vᵢ · fch^i` (each `vᵢ` lifted from
+  -- its raw u64 limb to an extension element). Mirrors `lookup::fingerprint`.
+  fn fingerprint_vals(fch: Ext, vals: List‹U64›) -> Ext {
+    match load(vals) {
+      ListNode.Nil => [0, 0],
+      ListNode.Cons(v, rest) =>
+        ext_add([limb_to_field(v), 0], ext_mul(fch, fingerprint_vals(fch, rest))),
+    }
+  }
+
+  -- The initial lookup accumulator built from the public claims:
+  -- `acc = Σ_claims 1 / (lookup_challenge + fingerprint(fingerprint_challenge, claim))`
+  -- (Rust `verify_multiple_claims`, lines 227-232). Empty claim list → zero.
+  fn claims_acc(acc: Ext, claims: List‹List‹U64››, lch: Ext, fch: Ext) -> Ext {
+    match load(claims) {
+      ListNode.Nil => acc,
+      ListNode.Cons(c, rest) =>
+        let msg = ext_add(lch, fingerprint_vals(fch, c));
+        claims_acc(ext_add(acc, ext_inverse(msg)), rest, lch, fch),
+    }
+  }
+
+  -- Step 3 + 5: derive the challenges via the (prover-faithful) Fiat-Shamir
+  -- replay over the verifying key's preprocessed commitment + the proof
+  -- commitments + log_degrees + claims, seed the lookup accumulator from the
+  -- claims, then run the OOD composition/quotient check for every circuit.
+  -- Returns 1 on success (any mismatch aborts via `assert_eq!`).
+  fn ood_verify(sys: Sys, proof: Proof, claims: List‹List‹U64››) -> G {
+    let Sys.Mk(_params, circuits, commit, prep_indices) = sys;
     match proof {
       Proof.Mk(commitments, accs, log_degrees, _opening,
                q_opened, prep_opt, stage1, stage2) =>
         let Commitments.Mk(s1c, s2c, qc) = commitments;
-        let (lch, fch, alpha, zeta) = fiat_shamir(s1c, s2c, qc, log_degrees);
+        let prep_cap = opt_commit_cap(commit);
+        let (lch, fch, alpha, zeta) = fiat_shamir(prep_cap, s1c, s2c, qc, log_degrees, claims);
+        let acc0 = claims_acc([0, 0], claims, lch, fch);
         ood_loop(circuits, prep_indices, log_degrees, accs, stage1, stage2,
-                 prep_opt, q_opened, 0, [0, 0], 0, lch, fch, alpha, zeta),
+                 prep_opt, q_opened, 0, acc0, 0, lch, fch, alpha, zeta),
     }
   }
 
+  -- Read the public claims from the verifier's IO channel. Wire format (set by
+  -- the prover-side harness): u64 `num_claims`, then per claim a u64 `num_vals`
+  -- followed by `num_vals` raw `u64` `Val`s (8 LE bytes each, canonical < p).
+  fn read_claims(stream: ByteStream) -> (List‹List‹U64››, ByteStream) {
+    let (n, s) = read_count(stream);
+    read_claims_n(s, n)
+  }
+  fn read_claims_n(stream: ByteStream, n: G) -> (List‹List‹U64››, ByteStream) {
+    match n {
+      0 => (store(ListNode.Nil), stream),
+      _ =>
+        let (c, s) = read_one_claim(stream);
+        let (rest, s2) = read_claims_n(s, n - 1);
+        (store(ListNode.Cons(c, rest)), s2),
+    }
+  }
+  fn read_one_claim(stream: ByteStream) -> (List‹U64›, ByteStream) {
+    let (m, s) = read_count(stream);
+    read_u64_vec_n(s, m)
+  }
+
   -- Self-test for the quotient-domain math: the chunk vanishing polynomial is
   -- zero at the chunk's own first point, and `from_ext_basis` reassembles a
   -- known extension element.
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 67f24cf7..87b2aeb4 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -30,6 +30,21 @@ private def toHex (b : ByteArray) : String :=
     let h := String.ofList (Nat.toDigits 16 x.toNat)
     s ++ (if h.length == 1 then "0" ++ h else h)) ""
 
+/-- 8 little-endian bytes of a `Nat` (taken mod 2^64). -/
+private def u64le (n : Nat) : Array UInt8 :=
+  (Array.range 8).map (fun i => UInt8.ofNat ((n >>> (8 * i)) % 256))
+
+/-- Serialize the public claims for the verifier's IO channel, matching the
+in-circuit `read_claims` wire format: u64 `num_claims`, then per claim a u64
+`num_vals` followed by the `Val`s as canonical 8-byte little-endian `u64`s. -/
+private def serializeClaims (claims : Array (Array Aiur.G)) : ByteArray := Id.run do
+  let mut out : Array UInt8 := u64le claims.size
+  for c in claims do
+    out := out ++ u64le c.size
+    for g in c do
+      out := out ++ u64le g.val.toNat
+  return ⟨out⟩
+
 /-- A tiny Aiur program: `factorial` as the proving entrypoint. -/
 def factorialProgram : Source.Toplevel := ⟦
   pub fn factorial(n: G) -> G {
@@ -93,10 +108,19 @@ def main : IO UInt32 := do
   let sysDigestInput : Array Aiur.G := sysDigest.data.map .ofUInt8
   let vkGs : Array Aiur.G := vkBytes.data.map .ofUInt8
 
-  -- Public input = proof digest ++ vk digest; IO hints: proof at [0], vk at [1].
-  let input : Array Aiur.G := digestInput ++ sysDigestInput
+  -- Public claims (`&[&[Val]]` = the single factorial claim) + keccak digest.
+  let claimBytes := serializeClaims #[claim]
+  IO.println s!"  claims: {claimBytes.size} bytes"
+  let claimsDigest := Keccak.hash claimBytes
+  IO.println s!"  keccak256(claims)= {toHex claimsDigest}"
+  let claimsDigestInput : Array Aiur.G := claimsDigest.data.map .ofUInt8
+  let claimGs : Array Aiur.G := claimBytes.data.map .ofUInt8
+
+  -- Public input = proof digest ++ vk digest ++ claims digest;
+  -- IO hints: proof at [0], vk at [1], claims at [2].
+  let input : Array Aiur.G := digestInput ++ sysDigestInput ++ claimsDigestInput
   let verifierIO : IOBuffer :=
-    ((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs
+    (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend #[Aiur.G.ofNat 2] claimGs
 
   -- ── 4. recursive verifier system ─────────────────────────────────────────
   let vTop ← match MultiStark.multiStark with
@@ -119,6 +143,22 @@ def main : IO UInt32 := do
     match vCompiled.bytecode.execute vIdx badInput verifierIO with
     | .error _ => IO.println "✓ tampered digest correctly rejected (assert_eq failed)"
     | .ok _ => IO.eprintln "✗ tampered digest was NOT rejected"; return 1
+    -- ── 6b. negative test: a tampered CLAIM (with a matching keccak digest)
+    --        must be rejected by the OOD / accumulator check. Changing the
+    --        claim feeds a different value into Fiat-Shamir (→ different ζ) and
+    --        into the lookup accumulator, so the composition/quotient identity
+    --        no longer holds even though the keccak binding passes. ──────────
+    let badClaim : Array Aiur.G := claim.set! (claim.size - 1) (Aiur.G.ofNat 121)
+    let badClaimBytes := serializeClaims #[badClaim]
+    let badClaimsDigest := Keccak.hash badClaimBytes
+    let badClaimInput : Array Aiur.G :=
+      digestInput ++ sysDigestInput ++ (badClaimsDigest.data.map .ofUInt8)
+    let badClaimIO : IOBuffer :=
+      (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend
+        #[Aiur.G.ofNat 2] (badClaimBytes.data.map .ofUInt8)
+    match vCompiled.bytecode.execute vIdx badClaimInput badClaimIO with
+    | .error _ => IO.println "✓ tampered claim correctly rejected (OOD/accumulator mismatch)"
+    | .ok _ => IO.eprintln "✗ tampered claim was NOT rejected"; return 1
     -- ── 7. circuit statistics ───────────────────────────────────────────────
     IO.println "\n── verifier circuit statistics ──"
     Aiur.printStats (Aiur.computeStats vCompiled queryCounts)

From 4a314bef54d1e011e440c6a56175977ede62eba5 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Tue, 2 Jun 2026 21:08:14 -0300
Subject: [PATCH 16/28] removed dead code

---
 Ix/MultiStark/Keccak.lean   | 14 -----------
 Ix/MultiStark/Verifier.lean | 48 -------------------------------------
 KeccakCheck.lean            | 48 -------------------------------------
 lakefile.lean               |  4 ----
 4 files changed, 114 deletions(-)
 delete mode 100644 KeccakCheck.lean

diff --git a/Ix/MultiStark/Keccak.lean b/Ix/MultiStark/Keccak.lean
index ef48a6e8..a2eb7e73 100644
--- a/Ix/MultiStark/Keccak.lean
+++ b/Ix/MultiStark/Keccak.lean
@@ -348,22 +348,8 @@ def keccak := ⟦
     [load(s[0]), load(s[1]), load(s[2]), load(s[3])]
   }
 
-  -- ==========================================================================
-  -- Test entrypoint: hash the IO-channel bytes (key [0]) and return 32 bytes.
-  -- ==========================================================================
-
-  pub fn keccak256_test() -> [[U8; 8]; 4] {
-    let (idx, len) = io_get_info([0]);
-    let bytes = #read_byte_stream(idx, len);
-    keccak256(bytes)
-  }
 ⟧
 
-/-- Standalone keccak-256 toplevel: `core` + `byteStream` + the implementation. -/
-def keccakToplevel : Except Aiur.Global Aiur.Source.Toplevel := do
-  let t ← IxVM.core.merge IxVM.byteStream
-  t.merge keccak
-
 end MultiStark
 
 end
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index e70a565d..09c9b555 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -251,14 +251,6 @@ def verifier := ⟦
   -- (shift = 1) generated by `two_adic_gen(L)`.
   -- ==========================================================================
 
-  -- x^(2^k): k repeated squarings (base field).
-  fn exp_pow2(x: G, k: G) -> G {
-    match k {
-      0 => x,
-      _ => exp_pow2(x * x, k - 1),
-    }
-  }
-
   -- e^(2^k) in the extension field.
   fn ext_exp_pow2(e: Ext, k: G) -> Ext {
     match k {
@@ -330,19 +322,6 @@ def verifier := ⟦
     (is_first, is_last, is_trans, inv_van)
   }
 
-  -- Self-test for the OOD domain math: the subgroup generator is a primitive
-  -- 2^L-th root of unity (g^(2^L) = 1, g^(2^(L-1)) = -1), and the vanishing
-  -- polynomial is zero at a domain point.
-  pub fn ood_domain_test() -> G {
-    let g = two_adic_gen(3);
-    assert_eq!(exp_pow2(g, 3), 1);
-    assert_eq!(exp_pow2(g, 2), 0 - 1);
-    let v = trace_vanishing([g, 0], 3);
-    assert_eq!(v[0], 0);
-    assert_eq!(v[1], 0);
-    1
-  }
-
   -- Structural + accumulator + PCS checks of a deserialized proof (steps 1, 2,
   -- 4). Fiat-Shamir (step 3) and the OOD check (step 5) live in `ood_verify`,
   -- which needs the verifying key and the claims.
@@ -370,17 +349,6 @@ def verifier := ⟦
     }
   }
 
-  -- Self-test for the extension-field arithmetic the OOD check is built on:
-  -- base inverse (7·7⁻¹ = 1) and extension inverse ((3+5X)·(3+5X)⁻¹ = 1).
-  pub fn ext_arith_test() -> G {
-    assert_eq!(7 * g_inverse(7), 1);
-    let a = [3, 5];
-    let prod = ext_mul(a, ext_inverse(a));
-    assert_eq!(prod[0], 1);
-    assert_eq!(prod[1], 0);
-    1
-  }
-
   -- ==========================================================================
   -- Step 5: out-of-domain (OOD) evaluation.
   --
@@ -756,22 +724,6 @@ def verifier := ⟦
     let (m, s) = read_count(stream);
     read_u64_vec_n(s, m)
   }
-
-  -- Self-test for the quotient-domain math: the chunk vanishing polynomial is
-  -- zero at the chunk's own first point, and `from_ext_basis` reassembles a
-  -- known extension element.
-  pub fn ood_quotient_test() -> G {
-    -- L = 1, log_qd = 1 → g_q = two_adic_gen(2); chunk 0 has shift 7.
-    let sinv0 = g_inverse(7);
-    let z = vanish_chunk([7, 0], 1, sinv0);
-    assert_eq!(z[0], 0);
-    assert_eq!(z[1], 0);
-    -- from_ext_basis([2,3], [5,7]) = [2,3] + [5,7]·X = [2 + 7·7, 3 + 5] = [51, 8].
-    let fb = from_ext_basis([2, 3], [5, 7]);
-    assert_eq!(fb[0], 51);
-    assert_eq!(fb[1], 8);
-    1
-  }
 ⟧
 
 end MultiStark
diff --git a/KeccakCheck.lean b/KeccakCheck.lean
deleted file mode 100644
index e10d7adb..00000000
--- a/KeccakCheck.lean
+++ /dev/null
@@ -1,48 +0,0 @@
-import Ix.MultiStark.Keccak
-import Ix.Aiur.Compiler
-import Ix.Aiur.Protocol
-import Ix.Aiur.Statistics
-import Ix.Keccak
-
-open Aiur
-
-def hb (x : UInt8) : String := let s := String.ofList (Nat.toDigits 16 x.toNat); if s.length == 1 then "0" ++ s else s
-def toHex (b : ByteArray) : String := b.data.foldl (fun s x => s ++ hb x) ""
-
-def runK (c : CompiledToplevel) (input : ByteArray) : Except String ByteArray := do
-  let idx := c.getFuncIdx `keccak256_test |>.get!
-  let io : IOBuffer := (default : IOBuffer).extend #[Aiur.G.ofNat 0] (input.data.map .ofUInt8)
-  match c.bytecode.execute idx #[] io with
-  | .error e => .error e
-  | .ok (out, _, _) => .ok ⟨out.map (fun g => UInt8.ofNat (g.n % 256))⟩
-
-def main : IO Unit := do
-  let compiled ← match MultiStark.keccakToplevel with
-    | .error g => IO.println s!"merge fail {g}"; return
-    | .ok t => match t.compile with
-      | .error e => IO.println s!"compile fail {e}"; return
-      | .ok c => pure c
-  IO.println "compiled ok"
-  let inputs : List ByteArray :=
-    [ "".toUTF8, "abc".toUTF8, "The quick brown fox jumps over the lazy dog".toUTF8,
-      (ByteArray.mk (Array.range 100 |>.map (fun n => UInt8.ofNat n))),
-      (ByteArray.mk (Array.range 135 |>.map (fun n => UInt8.ofNat n))),
-      (ByteArray.mk (Array.range 136 |>.map (fun n => UInt8.ofNat n))),
-      (ByteArray.mk (Array.range 200 |>.map (fun n => UInt8.ofNat n))),
-      (ByteArray.mk (Array.range 272 |>.map (fun n => UInt8.ofNat (n % 251)))) ]
-  for inp in inputs do
-    let expected := Keccak.hash inp
-    match runK compiled inp with
-    | .error e => IO.println s!"  [{inp.size}B] exec err: {e}"
-    | .ok got =>
-      let tag := if got == expected then "MATCH   " else "MISMATCH"
-      IO.println s!"  [{inp.size}B] {tag}  aiur={toHex got}  ref={toHex expected}"
-  -- Circuit statistics for one representative hash (272 bytes = 3 blocks).
-  let statInput := ByteArray.mk (Array.range 272 |>.map (fun n => UInt8.ofNat (n % 251)))
-  let idx := compiled.getFuncIdx `keccak256_test |>.get!
-  let io : IOBuffer := (default : IOBuffer).extend #[Aiur.G.ofNat 0] (statInput.data.map .ofUInt8)
-  match compiled.bytecode.execute idx #[] io with
-  | .error e => IO.println s!"stats exec err: {e}"
-  | .ok (_, _, queryCounts) =>
-    IO.println s!"\n── keccak256 circuit statistics ({statInput.size}-byte input, 3 blocks) ──"
-    Aiur.printStats (Aiur.computeStats compiled queryCounts)
diff --git a/lakefile.lean b/lakefile.lean
index 4af4b7e2..853c8191 100644
--- a/lakefile.lean
+++ b/lakefile.lean
@@ -102,10 +102,6 @@ lean_exe «recursive-verifier» where
   root := `RecursiveVerifier
   supportInterpreter := true
 
-lean_exe «keccak-check» where
-  root := `KeccakCheck
-  supportInterpreter := true
-
 end Tests
 
 section Benchmarks

From 3bc47023ea7b515faefc82953619f7b1e2aaf1a4 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Wed, 3 Jun 2026 15:29:32 -0300
Subject: [PATCH 17/28] PCS MMCS hash (sponge + compress)

---
 Ix/MultiStark/Pcs.lean | 183 ++++++++++++++++++++++++++++++++++++++---
 RecursiveVerifier.lean |   4 +
 2 files changed, 175 insertions(+), 12 deletions(-)

diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean
index c32c6e29..bee69076 100644
--- a/Ix/MultiStark/Pcs.lean
+++ b/Ix/MultiStark/Pcs.lean
@@ -1,19 +1,42 @@
 module
 public import Ix.Aiur.Meta
 public import Ix.MultiStark.Deserialize
+public import Ix.MultiStark.Keccak
 
 /-!
-# PCS verification stub
+# PCS (FRI) verification
 
 `multi-stark/src/verifier.rs` calls `pcs.verify(coms_to_verify, opening_proof,
-&mut challenger)` (a `TwoAdicFriPcs` FRI verification: query openings, Merkle
-authentication paths, FRI folding consistency). That is the heaviest part of
-the verifier and is **stubbed** here — `pcs_verify` accepts any opening proof
-unconditionally.
-
-Replacing this stub with a real FRI verifier (against the keccak Merkle commits
-and the Fiat-Shamir challenger) is the remaining work for a sound recursive
-verifier.
+&mut challenger)` — a `TwoAdicFriPcs` FRI verification: query openings, Merkle
+authentication paths, FRI folding consistency. This is the heaviest part of the
+verifier and is being ported here in stages.
+
+## Stage 1 — Merkle (MMCS) hash primitives (this file, done + validated)
+
+The input/commit-phase commitments are `MerkleTreeMmcs` over the keccak hasher
+configured in `multi-stark/src/types.rs`:
+
+* leaf hash  : `SerializingHasher<PaddingFreeSponge<KeccakF, 25, 17, 4>>`
+  — serialize each `Goldilocks` element to its canonical `u64`, then run a
+  padding-free **overwrite-mode** sponge (rate 17 lanes, capacity 8, output 4)
+  built on the keccak-f[1600] permutation (reused from `Keccak.lean`).
+* compression: `CompressionFunctionFromHasher<PaddingFreeSponge<…>, 2, 4>`
+  — hash the 8 lanes of two concatenated digests into a 4-lane digest.
+
+`PaddingFreeSponge` differs from keccak-256 (`Keccak.lean`): no `10*1` padding,
+rate 17 *lanes* (not 136 bytes), and each input block **overwrites** the rate
+region (keccak-256 XORs). A full block triggers a permute; a final partial block
+permutes once iff it absorbed ≥1 element; an empty trailing block does not
+permute.
+
+Validated against `multi-stark`'s own hasher (see `pcs_hash_test`, reference
+values from the `pcs_ref_values` test in `multi-stark/src/types.rs`).
+
+## Stage 2+ — TODO
+
+Merkle `verify_batch` (binary tree, multi-height injection), challenger
+threading, the FRI fold chain (`open_input` / `verify_query`), and the final
+polynomial check. Until those land, `pcs_verify` remains an accept-stub.
 -/
 
 public section
@@ -21,12 +44,148 @@ public section
 namespace MultiStark
 
 def pcs := ⟦
-  -- STUB: accept any FRI opening proof. Returns 1 ("accepted"). The argument is
-  -- ignored; a real implementation would check query openings, Merkle paths and
-  -- the FRI folding against the committed roots and sampled challenges.
+  -- ==========================================================================
+  -- PaddingFreeSponge<KeccakF, 25, 17, 4> in overwrite mode.
+  --
+  -- Lanes are the keccak `Lane` type (`&[U8; 8]`, LE). A `U64` opened value is
+  -- (for an honest, canonical proof) exactly the 8 LE bytes of a lane, so a lane
+  -- is just `store(u64)`. The 25-lane keccak `State` and `keccak_f_fold` are
+  -- reused from `Keccak.lean`.
+  -- ==========================================================================
+
+  -- Take one input element as the next rate lane, or keep the existing state
+  -- lane `dflt` when the input is exhausted. The flag is 1 iff an input element
+  -- was consumed (input is consumed front-to-back, so the per-block flags are a
+  -- run of 1s followed by 0s).
+  fn u64_pick(vals: List‹U64›, dflt: Lane) -> (Lane, List‹U64›, G) {
+    match load(vals) {
+      ListNode.Nil => (dflt, vals, 0),
+      ListNode.Cons(x, rest) => (store(x), rest, 1),
+    }
+  }
+
+  -- Absorb the input lanes into the state, 17 (= RATE) at a time, overwriting
+  -- the rate region. After a FULL block of 17, permute and continue. A final
+  -- partial block (1..16 elements) permutes once; an exactly-empty trailing
+  -- block does not permute (matches `PaddingFreeSponge::hash_iter`).
+  fn pf_absorb(sp: State, vals: List‹U64›) -> State {
+    let old = load(sp);
+    let (l0,  v1,  f0)  = u64_pick(vals, old[0]);
+    let (l1,  v2,  _f1) = u64_pick(v1,  old[1]);
+    let (l2,  v3,  _f2) = u64_pick(v2,  old[2]);
+    let (l3,  v4,  _f3) = u64_pick(v3,  old[3]);
+    let (l4,  v5,  _f4) = u64_pick(v4,  old[4]);
+    let (l5,  v6,  _f5) = u64_pick(v5,  old[5]);
+    let (l6,  v7,  _f6) = u64_pick(v6,  old[6]);
+    let (l7,  v8,  _f7) = u64_pick(v7,  old[7]);
+    let (l8,  v9,  _f8) = u64_pick(v8,  old[8]);
+    let (l9,  v10, _f9) = u64_pick(v9,  old[9]);
+    let (l10, v11, _fa) = u64_pick(v10, old[10]);
+    let (l11, v12, _fb) = u64_pick(v11, old[11]);
+    let (l12, v13, _fc) = u64_pick(v12, old[12]);
+    let (l13, v14, _fd) = u64_pick(v13, old[13]);
+    let (l14, v15, _fe) = u64_pick(v14, old[14]);
+    let (l15, v16, _ff) = u64_pick(v15, old[15]);
+    let (l16, v17, f16) = u64_pick(v16, old[16]);
+    let sp2 = store([l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13,
+                     l14, l15, l16, old[17], old[18], old[19], old[20], old[21],
+                     old[22], old[23], old[24]]);
+    match f16 {
+      1 => pf_absorb(keccak_f_fold(sp2, 24), v17),
+      _ => match f0 {
+        0 => sp2,
+        _ => keccak_f_fold(sp2, 24),
+      },
+    }
+  }
+
+  -- Hash a list of `u64` lanes (a serialized field-element row, or two
+  -- concatenated digests) into a 4-lane `Digest`.
+  fn pf_sponge_u64(vals: List‹U64›) -> Digest {
+    let z = store([0u8; 8]);
+    let init = store([z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z,
+                      z, z, z, z, z]);
+    let sp = pf_absorb(init, vals);
+    let s = load(sp);
+    [load(s[0]), load(s[1]), load(s[2]), load(s[3])]
+  }
+
+  -- The MMCS leaf hash of a single matrix row (`SerializingHasher` over the row
+  -- of canonical `u64`s). For a leaf joining several same-height matrices, the
+  -- rows are concatenated first (see `verify_batch`, future work).
+  fn mmcs_hash_row(row: List‹U64›) -> Digest {
+    pf_sponge_u64(row)
+  }
+
+  -- The MMCS 2-to-1 compression: hash the 8 lanes of two concatenated digests.
+  fn mmcs_compress(a: Digest, b: Digest) -> Digest {
+    let t0 = store(ListNode.Nil);
+    let t1 = store(ListNode.Cons(b[3], t0));
+    let t2 = store(ListNode.Cons(b[2], t1));
+    let t3 = store(ListNode.Cons(b[1], t2));
+    let t4 = store(ListNode.Cons(b[0], t3));
+    let t5 = store(ListNode.Cons(a[3], t4));
+    let t6 = store(ListNode.Cons(a[2], t5));
+    let t7 = store(ListNode.Cons(a[1], t6));
+    let t8 = store(ListNode.Cons(a[0], t7));
+    pf_sponge_u64(t8)
+  }
+
+  -- ==========================================================================
+  -- PCS verification entry (STILL A STUB).
+  -- Accepts any FRI opening proof. A real implementation will check query
+  -- openings, Merkle paths (via `mmcs_*` above) and the FRI folding against the
+  -- committed roots and the (threaded) Fiat-Shamir challenger.
+  -- ==========================================================================
   fn pcs_verify(opening: FriProof) -> G {
     1
   }
+
+  -- ==========================================================================
+  -- Self-test (validation): the keccak MMCS sponge/compression against
+  -- reference values from `multi-stark`'s own hasher (`pcs_ref_values`).
+  -- Compares each output lane mod p via `limb_to_field` (all reference lanes are
+  -- canonical, so this is exact).
+  -- ==========================================================================
+
+  -- A small single-byte value as a `U64` (8 LE bytes).
+  fn u64_of(b: U8) -> U64 {
+    [b, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]
+  }
+  -- `[i+1, i+2, …, n]` as a `List‹U64›`.
+  fn build_range(i: G, n: G) -> List‹U64› {
+    match n - i {
+      0 => store(ListNode.Nil),
+      _ => store(ListNode.Cons(u64_of(u8_from_field_unsafe(i + 1)), build_range(i + 1, n))),
+    }
+  }
+  fn assert_digest(d: Digest, e0: G, e1: G, e2: G, e3: G) -> G {
+    assert_eq!(limb_to_field(d[0]), e0);
+    assert_eq!(limb_to_field(d[1]), e1);
+    assert_eq!(limb_to_field(d[2]), e2);
+    assert_eq!(limb_to_field(d[3]), e3);
+    1
+  }
+  pub fn pcs_hash_test() -> G {
+    -- LEAF3: hash([1,2,3])
+    let d3 = mmcs_hash_row(build_range(0, 3));
+    assert_eq!(assert_digest(d3, 0xc55a6a1beaea9fec, 0xc8f0dbc4c59ec440,
+                                 0xacb1295de9bfe032, 0x445d569d3dfc9543), 1);
+    -- LEAF17: exactly one full block, no extra permute.
+    let d17 = mmcs_hash_row(build_range(0, 17));
+    assert_eq!(assert_digest(d17, 0x388da73622e8fdd5, 0xec687be9c50d2218,
+                                  0x528d145dfe6571af, 0xd2eb808dfba4703c), 1);
+    -- LEAF20: full block + 3-element partial (two permutes).
+    let d20 = mmcs_hash_row(build_range(0, 20));
+    assert_eq!(assert_digest(d20, 0xec696847be88d358, 0x202861c67ff4cec8,
+                                  0x88e006a48aaa0661, 0xabaddb9d32ecd024), 1);
+    -- COMPRESS([1,2,3,4],[5,6,7,8])
+    let c = mmcs_compress([u64_of(1u8), u64_of(2u8), u64_of(3u8), u64_of(4u8)],
+                          [u64_of(5u8), u64_of(6u8), u64_of(7u8), u64_of(8u8)]);
+    assert_eq!(assert_digest(c, 0xda1ef0642722b22e, 0x4851efdbdb2a2fd8,
+                                0x37e8ff900ea95d47, 0xa153eee7805376fb), 1);
+    1
+  }
 ⟧
 
 end MultiStark
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 87b2aeb4..11679083 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -127,6 +127,10 @@ def main : IO UInt32 := do
     | .error e => IO.eprintln s!"verifier toplevel merge failed: {e}"; return 1
     | .ok t => pure t
   let (vCompiled, _vSystem) ← buildSystem "verifier" vTop
+  -- TEMP: validate the PCS keccak-MMCS sponge primitives.
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_hash_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ pcs_hash_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ pcs_hash_test: keccak MMCS sponge/compress match reference"
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1

From 911a43d9a748674e3a3151ec3c8ae66ee09353dc Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Wed, 3 Jun 2026 16:35:27 -0300
Subject: [PATCH 18/28] PCS Merkle verify_batch

---
 Ix/MultiStark/Pcs.lean | 165 +++++++++++++++++++++++++++++++++++++++++
 RecursiveVerifier.lean |   5 +-
 2 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean
index bee69076..fcd33c6d 100644
--- a/Ix/MultiStark/Pcs.lean
+++ b/Ix/MultiStark/Pcs.lean
@@ -131,6 +131,122 @@ def pcs := ⟦
     pf_sponge_u64(t8)
   }
 
+  -- ==========================================================================
+  -- Merkle MMCS `verify_batch` (binary tree, DIGEST_ELEMS = 4).
+  --
+  -- Ports `multi-stark/Plonky3/merkle-tree/src/mmcs.rs::verify_batch` for the
+  -- binary (N = 2) case. All committed matrices have power-of-two heights, so a
+  -- matrix's height is `2^log_height`. The opened rows arrive in matrix order;
+  -- `lhs` is the matching list of per-matrix log-heights. The query `index` is
+  -- threaded as a bit list (LSB first = leaf→root path) to avoid field division.
+  --
+  -- The leaf hash joins all matrices at the maximum log-height. Walking down,
+  -- each level folds with one proof sibling (ordered by the path bit), then —
+  -- if any matrix lives at the new log-height — injects that matrix group's leaf
+  -- hash via a second compression (this consumes no proof sibling), exactly as
+  -- the Rust loop's `next_height_openings_digest` injection.
+  -- ==========================================================================
+
+  -- 1 iff two digests are equal (compared as field elements; hash outputs are
+  -- canonical so this is exact).
+  fn digest_eq(a: Digest, b: Digest) -> G {
+    eq_zero(limb_to_field(a[0]) - limb_to_field(b[0])) *
+    eq_zero(limb_to_field(a[1]) - limb_to_field(b[1])) *
+    eq_zero(limb_to_field(a[2]) - limb_to_field(b[2])) *
+    eq_zero(limb_to_field(a[3]) - limb_to_field(b[3]))
+  }
+
+  -- Compress (current, sibling) in path order: path bit 0 ⇒ current is the left
+  -- child, bit 1 ⇒ current is the right child.
+  fn compress_ordered(bit: G, d: Digest, s: Digest) -> Digest {
+    match bit {
+      0 => mmcs_compress(d, s),
+      _ => mmcs_compress(s, d),
+    }
+  }
+
+  -- 1 iff some matrix has log-height `target`.
+  fn has_height(lhs: List‹G›, target: G) -> G {
+    match load(lhs) {
+      ListNode.Nil => 0,
+      ListNode.Cons(h, rest) =>
+        match eq_zero(h - target) {
+          1 => 1,
+          _ => has_height(rest, target),
+        },
+    }
+  }
+
+  -- Concatenate the opened rows of every matrix whose log-height is `target`
+  -- (in matrix order — the stable height-sort preserves it), for the joint leaf
+  -- hash `hash_iter_slices`.
+  fn concat_at(rows: List‹List‹U64››, lhs: List‹G›, target: G) -> List‹U64› {
+    match load(rows) {
+      ListNode.Nil => store(ListNode.Nil),
+      ListNode.Cons(r, rrest) =>
+        let &ListNode.Cons(lh, lrest) = lhs;
+        concat_at_step(eq_zero(lh - target), r, rrest, lrest, target),
+    }
+  }
+  fn concat_at_step(hit: G, r: List‹U64›, rrest: List‹List‹U64››,
+      lrest: List‹G›, target: G) -> List‹U64› {
+    match hit {
+      0 => concat_at(rrest, lrest, target),
+      _ => list_concat(r, concat_at(rrest, lrest, target)),
+    }
+  }
+
+  -- The joint leaf hash of all matrices at log-height `target`.
+  fn leaf_hash_at(rows: List‹List‹U64››, lhs: List‹G›, target: G) -> Digest {
+    pf_sponge_u64(concat_at(rows, lhs, target))
+  }
+
+  -- Inject the leaf hash of any matrices at log-height `lh` (if present) via a
+  -- second compression onto `d`.
+  fn inject_maybe(rows: List‹List‹U64››, lhs: List‹G›, lh: G, d: Digest) -> Digest {
+    match has_height(lhs, lh) {
+      0 => d,
+      _ => mmcs_compress(d, leaf_hash_at(rows, lhs, lh)),
+    }
+  }
+
+  -- Recompose remaining path bits (LSB first) into the cap index.
+  fn bits_to_num(bits: List‹G›) -> G {
+    match load(bits) {
+      ListNode.Nil => 0,
+      ListNode.Cons(b, rest) => b + 2 * bits_to_num(rest),
+    }
+  }
+
+  -- Walk the authentication path: one proof sibling per level (fold), with a
+  -- possible leaf injection at the new log-height `lh`. Returns the recomputed
+  -- root and the leftover cap index.
+  fn mmcs_fold(d: Digest, rows: List‹List‹U64››, lhs: List‹G›,
+      proof: List‹Digest›, ibits: List‹G›, lh: G) -> (Digest, G) {
+    match load(proof) {
+      ListNode.Nil => (d, bits_to_num(ibits)),
+      ListNode.Cons(s, prest) =>
+        let &ListNode.Cons(bit, brest) = ibits;
+        let d1 = compress_ordered(bit, d, s);
+        let d2 = inject_maybe(rows, lhs, lh, d1);
+        mmcs_fold(d2, rows, lhs, prest, brest, lh - 1),
+    }
+  }
+
+  -- Recompute the Merkle root from the opened rows + authentication path.
+  fn mmcs_root(rows: List‹List‹U64››, lhs: List‹G›, ibits: List‹G›,
+      proof: List‹Digest›, log_max: G) -> (Digest, G) {
+    let leaf = leaf_hash_at(rows, lhs, log_max);
+    mmcs_fold(leaf, rows, lhs, proof, ibits, log_max - 1)
+  }
+
+  -- 1 iff the recomputed root matches the commitment cap at the cap index.
+  fn mmcs_verify(cap: MerkleCap, rows: List‹List‹U64››, lhs: List‹G›,
+      ibits: List‹G›, proof: List‹Digest›, log_max: G) -> G {
+    let (root, capidx) = mmcs_root(rows, lhs, ibits, proof, log_max);
+    digest_eq(list_lookup(cap, capidx), root)
+  }
+
   -- ==========================================================================
   -- PCS verification entry (STILL A STUB).
   -- Accepts any FRI opening proof. A real implementation will check query
@@ -186,6 +302,55 @@ def pcs := ⟦
                                 0x37e8ff900ea95d47, 0xa153eee7805376fb), 1);
     1
   }
+
+  -- Merkle `verify_batch` self-test against the `pcs_merkle_ref` reference: a
+  -- cap-height-0 tree over 3 matrices of heights 8/4/2 (log-heights 3/2/1),
+  -- opened at index 5 (path bits 1,0,1). Checks the recomputed root matches the
+  -- committed root and that the cap index is 0, then that a tampered opened row
+  -- yields a different (rejected) root.
+  pub fn pcs_merkle_test() -> G {
+    -- opened rows (matrix order): m0 row5, m1 row2, m2 row1.
+    let row0 = store(ListNode.Cons(u64_of(11u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil)))));
+    let row1 = store(ListNode.Cons(u64_of(107u8), store(ListNode.Cons(u64_of(108u8),
+                 store(ListNode.Cons(u64_of(109u8), store(ListNode.Nil)))))));
+    let row2 = store(ListNode.Cons(u64_of(202u8), store(ListNode.Nil)));
+    let rows = store(ListNode.Cons(row0, store(ListNode.Cons(row1,
+                 store(ListNode.Cons(row2, store(ListNode.Nil)))))));
+    -- log-heights and path bits (index 5 = 0b101, LSB first).
+    let lhs = store(ListNode.Cons(3, store(ListNode.Cons(2, store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    let ibits = store(ListNode.Cons(1, store(ListNode.Cons(0, store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    -- authentication path SIB0, SIB1, SIB2 (each a Digest = [U64; 4]).
+    let sib0 = [[9u8, 36u8, 179u8, 127u8, 205u8, 83u8, 105u8, 203u8],
+                [95u8, 229u8, 105u8, 223u8, 113u8, 55u8, 97u8, 122u8],
+                [135u8, 8u8, 65u8, 248u8, 163u8, 163u8, 68u8, 81u8],
+                [9u8, 11u8, 20u8, 209u8, 10u8, 168u8, 151u8, 125u8]];
+    let sib1 = [[227u8, 58u8, 255u8, 213u8, 77u8, 152u8, 42u8, 77u8],
+                [113u8, 86u8, 2u8, 151u8, 97u8, 63u8, 58u8, 45u8],
+                [228u8, 139u8, 228u8, 194u8, 182u8, 115u8, 107u8, 221u8],
+                [248u8, 16u8, 30u8, 93u8, 176u8, 36u8, 205u8, 88u8]];
+    let sib2 = [[236u8, 144u8, 115u8, 218u8, 140u8, 5u8, 86u8, 229u8],
+                [95u8, 186u8, 252u8, 175u8, 21u8, 247u8, 153u8, 25u8],
+                [113u8, 78u8, 92u8, 200u8, 212u8, 175u8, 247u8, 47u8],
+                [78u8, 145u8, 206u8, 54u8, 175u8, 155u8, 165u8, 206u8]];
+    let proof = store(ListNode.Cons(sib0, store(ListNode.Cons(sib1,
+                  store(ListNode.Cons(sib2, store(ListNode.Nil)))))));
+    let (root, capidx) = mmcs_root(rows, lhs, ibits, proof, 3);
+    assert_eq!(capidx, 0);
+    assert_eq!(assert_digest(root, 0x6211b9a1a116a006, 0x435ee98e1504880f,
+                                   0x900c7274b9a215f, 0xf6e3aaac5dcd90bd), 1);
+    -- tamper: perturb m0's first opened value → root must change.
+    let bad0 = store(ListNode.Cons(u64_of(99u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil)))));
+    let bad_rows = store(ListNode.Cons(bad0, store(ListNode.Cons(row1,
+                     store(ListNode.Cons(row2, store(ListNode.Nil)))))));
+    let cap = store(ListNode.Cons([[6u8, 160u8, 22u8, 161u8, 161u8, 185u8, 17u8, 98u8],
+                                   [15u8, 136u8, 4u8, 21u8, 142u8, 233u8, 94u8, 67u8],
+                                   [95u8, 33u8, 154u8, 75u8, 39u8, 199u8, 0u8, 9u8],
+                                   [189u8, 144u8, 205u8, 93u8, 172u8, 170u8, 227u8, 246u8]],
+                    store(ListNode.Nil)));
+    assert_eq!(mmcs_verify(cap, rows, lhs, ibits, proof, 3), 1);
+    assert_eq!(mmcs_verify(cap, bad_rows, lhs, ibits, proof, 3), 0);
+    1
+  }
 ⟧
 
 end MultiStark
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 11679083..72786059 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -127,10 +127,13 @@ def main : IO UInt32 := do
     | .error e => IO.eprintln s!"verifier toplevel merge failed: {e}"; return 1
     | .ok t => pure t
   let (vCompiled, _vSystem) ← buildSystem "verifier" vTop
-  -- TEMP: validate the PCS keccak-MMCS sponge primitives.
+  -- TEMP: validate the PCS keccak-MMCS sponge primitives + Merkle verify_batch.
   match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_hash_test).get! #[] default with
   | .error e => IO.eprintln s!"✗ pcs_hash_test FAILED — {e}"; return 1
   | .ok _ => IO.println "✓ pcs_hash_test: keccak MMCS sponge/compress match reference"
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_merkle_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ pcs_merkle_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ pcs_merkle_test: Merkle verify_batch matches reference (root + tamper)"
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1

From 4ceaa4e2744d930898f8c5f9b53c88d7b0b67123 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Wed, 3 Jun 2026 16:55:53 -0300
Subject: [PATCH 19/28] PCS challenger sample_bits

---
 Ix/MultiStark/Verifier.lean | 41 +++++++++++++++++++++++++++++++++++++
 RecursiveVerifier.lean      |  3 +++
 2 files changed, 44 insertions(+)

diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index 09c9b555..b2eb2885 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -168,6 +168,47 @@ def verifier := ⟦
     (c0, c1, i1, o1)
   }
 
+  -- `sample_bits(n)` (FRI query index). `SerializingChallenger64::sample_bits`
+  -- reads one 8-byte sample as a little-endian u64 and masks the low `n` bits.
+  -- We return the low `n` bits as a list (LSB first = the leaf→root Merkle/FRI
+  -- path), built from the 8 sampled bytes' bit decompositions (reusing keccak's
+  -- `cons8`), truncated to `n`.
+  fn sample8_bits(bytes: [U8; 8]) -> List‹G› {
+    cons8(u8_bit_decomposition(bytes[0]),
+    cons8(u8_bit_decomposition(bytes[1]),
+    cons8(u8_bit_decomposition(bytes[2]),
+    cons8(u8_bit_decomposition(bytes[3]),
+    cons8(u8_bit_decomposition(bytes[4]),
+    cons8(u8_bit_decomposition(bytes[5]),
+    cons8(u8_bit_decomposition(bytes[6]),
+    cons8(u8_bit_decomposition(bytes[7]), store(ListNode.Nil)))))))))
+  }
+  fn take_bits(bits: List‹G›, n: G) -> List‹G› {
+    match n {
+      0 => store(ListNode.Nil),
+      _ =>
+        let &ListNode.Cons(b, rest) = bits;
+        store(ListNode.Cons(b, take_bits(rest, n - 1))),
+    }
+  }
+  fn ch_sample_bits(input: ByteStream, output: ByteStream, n: G)
+      -> (List‹G›, ByteStream, ByteStream) {
+    let (bytes, i1, o1) = ch_sample8(input, output);
+    (take_bits(sample8_bits(bytes), n), i1, o1)
+  }
+
+  -- Self-test: `sample_bits(20)` after observing the single `Val`
+  -- `0x0102030405060708` (8 LE bytes) must equal the reference `799146`
+  -- (`pcs_challenger_ref` in `multi-stark/src/types.rs`).
+  pub fn sample_bits_test() -> G {
+    let input = store(ListNode.Cons(8u8, store(ListNode.Cons(7u8, store(ListNode.Cons(6u8,
+      store(ListNode.Cons(5u8, store(ListNode.Cons(4u8, store(ListNode.Cons(3u8,
+      store(ListNode.Cons(2u8, store(ListNode.Cons(1u8, store(ListNode.Nil)))))))))))))))));
+    let (bits, _i, _o) = ch_sample_bits(input, store(ListNode.Nil), 20);
+    assert_eq!(bits_to_num(bits), 799146);
+    1
+  }
+
   -- Append (observe) 8 little-endian bytes of `b` at the END of the challenger
   -- input buffer. The transcript is held front-to-back (front = first observed =
   -- first hashed, matching `keccak256`'s absorption order), so an observation
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 72786059..959e5d6d 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -134,6 +134,9 @@ def main : IO UInt32 := do
   match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_merkle_test).get! #[] default with
   | .error e => IO.eprintln s!"✗ pcs_merkle_test FAILED — {e}"; return 1
   | .ok _ => IO.println "✓ pcs_merkle_test: Merkle verify_batch matches reference (root + tamper)"
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `sample_bits_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ sample_bits_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ sample_bits_test: challenger sample_bits matches reference"
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1

From 6c04ee0b68cffd3948b95369bf7d19810c135935 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Fri, 5 Jun 2026 12:45:15 -0300
Subject: [PATCH 20/28] PCS challenger continuation

---
 Ix/MultiStark/Verifier.lean | 73 ++++++++++++++++++++++++++++++++++---
 RecursiveVerifier.lean      |  3 ++
 2 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index b2eb2885..088d4a69 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -221,6 +221,64 @@ def verifier := ⟦
     list_concat(input, cap_onto(cap, store(ListNode.Nil)))
   }
 
+  -- ==========================================================================
+  -- PCS challenger continuation (Phase 4): the post-ζ transcript that
+  -- `two_adic_pcs::verify` + `verify_fri` replay. Unlike `fiat_shamir` — where
+  -- every sample is followed by an observe (so each sample re-flushes from an
+  -- empty `output`) — the PCS phase has *consecutive* samples with no observe
+  -- between (the PCS batch challenge α, then immediately the FRI batch challenge
+  -- α). So both challenger buffers must be threaded: `output` carries the
+  -- leftover hash bytes from one sample into the next instead of re-flushing.
+  -- ==========================================================================
+
+  -- Observe one `Val` (8 LE bytes): append to `input`, CLEAR `output` (any
+  -- leftover sampled bytes are discarded), per `HashChallenger::observe`.
+  fn ch_observe_val(input: ByteStream, v: U64) -> (ByteStream, ByteStream) {
+    (snoc_b8(input, v), store(ListNode.Nil))
+  }
+
+  -- Sample a degree-2 extension element, threading BOTH challenger buffers so a
+  -- following consecutive sample continues from the same hash `output` stream
+  -- (no re-flush). Limbs are reduced mod p (`limb_to_field`); the ~2⁻³²
+  -- rejection band where a raw limb ≥ p is still unhandled (as in `fiat_shamir`).
+  fn pcs_sample_ext(input: ByteStream, output: ByteStream)
+      -> (Ext, ByteStream, ByteStream) {
+    let (c0, c1, i1, o1) = ch_sample_ext(input, output);
+    ([limb_to_field(c0), limb_to_field(c1)], i1, o1)
+  }
+
+  -- Self-test: replay the synthetic PCS challenger sequence from
+  -- `pcs_challenger4_ref` (multi-stark/src/types.rs) and check every sampled
+  -- challenge against the reference. Decisively exercises the two consecutive
+  -- ext samples (α_pcs then α_fri) sharing one hash `output` stream.
+  pub fn pcs_challenger4_test() -> G {
+    -- post-ζ input buffer = observe V0 then V1 (forward / observation order).
+    let v0 = [8u8, 7u8, 6u8, 5u8, 4u8, 3u8, 2u8, 1u8];     -- 0x0102030405060708
+    let v1 = [136u8, 119u8, 102u8, 85u8, 68u8, 51u8, 34u8, 17u8]; -- 0x1122334455667788
+    let input = snoc_b8(snoc_b8(store(ListNode.Nil), v0), v1);
+    -- α_pcs (output empty ⇒ flush), then α_fri (CONSECUTIVE ⇒ thread output).
+    let (apcs, input, o1) = pcs_sample_ext(input, store(ListNode.Nil));
+    let (afri, input, o2) = pcs_sample_ext(input, o1);
+    assert_eq!(apcs[0], 2882912772410685996);
+    assert_eq!(apcs[1], 910933442133595775);
+    assert_eq!(afri[0], 14440140149289897216);
+    assert_eq!(afri[1], 8092267645441512944);
+    -- observe commit (clears output), sample β.
+    let v2 = [239u8, 190u8, 173u8, 222u8, 0u8, 0u8, 0u8, 0u8]; -- 0x00000000deadbeef
+    let (input, _oc) = ch_observe_val(input, v2);
+    let (beta, input, _ob) = pcs_sample_ext(input, store(ListNode.Nil));
+    assert_eq!(beta[0], 10456048119516576995);
+    assert_eq!(beta[1], 3173538015651228593);
+    -- observe final_poly coeff + log_arity (each a Val), then sample the index.
+    let v3 = [4u8, 3u8, 2u8, 1u8, 13u8, 12u8, 11u8, 10u8]; -- 0x0a0b0c0d01020304
+    let v4 = [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];     -- 0x0000000000000002
+    let (input, _o3) = ch_observe_val(input, v3);
+    let (input, _o4) = ch_observe_val(input, v4);
+    let (bits, _bi, _bo) = ch_sample_bits(input, store(ListNode.Nil), 20);
+    assert_eq!(bits_to_num(bits), 336138);
+    1
+  }
+
   -- Append a claim's values (each `Val` as 8 LE bytes) onto `tail`, in order.
   fn claim_vals_onto(vals: List‹U64›, tail: ByteStream) -> ByteStream {
     match load(vals) {
@@ -257,8 +315,12 @@ def verifier := ⟦
   -- NOTE: the ~2⁻³² rejection band (a sampled 8-byte limb ≥ p) is still not
   -- handled (`ch_sample8` reduces mod p); for small proofs no sample lands in
   -- the band, but honest rejection sampling remains a TODO for full generality.
+  -- Also returns the post-ζ challenger `input` buffer, which the PCS phase
+  -- (Phase 4+) continues observing into. The leftover `output` after the ζ
+  -- sample is discarded — the next challenger op is an observe (of the opened
+  -- values), which clears `output` anyway.
   fn fiat_shamir(prep: MerkleCap, s1: MerkleCap, s2: MerkleCap, q: MerkleCap,
-      lds: List‹U8›, claims: List‹List‹U64››) -> (Ext, Ext, Ext, Ext) {
+      lds: List‹U8›, claims: List‹List‹U64››) -> (Ext, Ext, Ext, Ext, ByteStream) {
     -- Initial transcript, front-to-back: prep, stage_1, log_degrees, claims.
     -- Built inner-to-outer with the prepend helpers so the result is in
     -- forward (observation) order.
@@ -278,12 +340,13 @@ def verifier := ⟦
     let (a0, a1, input, _oa) = ch_sample_ext(input, store(ListNode.Nil));
     -- observe quotient commitment
     let input = snoc_cap(input, q);
-    -- sample out-of-domain point ζ
-    let (z0, z1, _input, _oz) = ch_sample_ext(input, store(ListNode.Nil));
+    -- sample out-of-domain point ζ; keep the resulting `input` for the PCS phase
+    let (z0, z1, zinput, _oz) = ch_sample_ext(input, store(ListNode.Nil));
     ([limb_to_field(l0), limb_to_field(l1)],
      [limb_to_field(f0), limb_to_field(f1)],
      [limb_to_field(a0), limb_to_field(a1)],
-     [limb_to_field(z0), limb_to_field(z1)])
+     [limb_to_field(z0), limb_to_field(z1)],
+     zinput)
   }
 
   -- ==========================================================================
@@ -738,7 +801,7 @@ def verifier := ⟦
                q_opened, prep_opt, stage1, stage2) =>
         let Commitments.Mk(s1c, s2c, qc) = commitments;
         let prep_cap = opt_commit_cap(commit);
-        let (lch, fch, alpha, zeta) = fiat_shamir(prep_cap, s1c, s2c, qc, log_degrees, claims);
+        let (lch, fch, alpha, zeta, _post_zeta_input) = fiat_shamir(prep_cap, s1c, s2c, qc, log_degrees, claims);
         let acc0 = claims_acc([0, 0], claims, lch, fch);
         ood_loop(circuits, prep_indices, log_degrees, accs, stage1, stage2,
                  prep_opt, q_opened, 0, acc0, 0, lch, fch, alpha, zeta),
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 959e5d6d..d3fa989f 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -137,6 +137,9 @@ def main : IO UInt32 := do
   match vCompiled.bytecode.execute (vCompiled.getFuncIdx `sample_bits_test).get! #[] default with
   | .error e => IO.eprintln s!"✗ sample_bits_test FAILED — {e}"; return 1
   | .ok _ => IO.println "✓ sample_bits_test: challenger sample_bits matches reference"
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_challenger4_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ pcs_challenger4_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ pcs_challenger4_test: PCS challenger continuation (α_pcs/α_fri/β/index) matches reference"
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1

From e9aaf244a4ae8277b23e147d5eb892a5210e67f5 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Fri, 5 Jun 2026 13:48:01 -0300
Subject: [PATCH 21/28] PCS FRI fold_row and reduced openings

---
 Ix/MultiStark/Pcs.lean | 110 +++++++++++++++++++++++++++++++++++++++++
 RecursiveVerifier.lean |   6 +++
 2 files changed, 116 insertions(+)

diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean
index fcd33c6d..9138f71a 100644
--- a/Ix/MultiStark/Pcs.lean
+++ b/Ix/MultiStark/Pcs.lean
@@ -247,6 +247,116 @@ def pcs := ⟦
     digest_eq(list_lookup(cap, capidx), root)
   }
 
+  -- ==========================================================================
+  -- FRI fold step (`TwoAdicFriFolding::fold_row`, arity-2 case).
+  --
+  -- `innerFri` uses `maxLogArity = 1`, so every FRI fold is binary. Ports
+  -- `fold_row` for `log_arity = 1`: given the sibling pair (e0, e1) of a node
+  -- and the round challenge β,
+  --   folded = (e0 + e1)/2 + β·(e0 − e1)/(2s),
+  --   s = g_{log_height+1}^{reverse_bits_len(index, log_height)}     (base field)
+  -- where `g_k = two_adic_gen(k)`. The index is threaded as its low-`log_height`
+  -- bit list (LSB first), matching `ch_sample_bits`; `reverse_bits_len` is then
+  -- just reversing that list.
+  -- ==========================================================================
+
+  -- Reverse a `G` (bit) list onto `acc`.
+  fn glist_rev(l: List‹G›, acc: List‹G›) -> List‹G› {
+    match load(l) {
+      ListNode.Nil => acc,
+      ListNode.Cons(b, rest) => glist_rev(rest, store(ListNode.Cons(b, acc))),
+    }
+  }
+
+  -- base^(Σ bits_i · 2^i), bits LSB-first (square-and-multiply over the bits).
+  fn exp_by_bits(base: G, bits: List‹G›) -> G {
+    match load(bits) {
+      ListNode.Nil => 1,
+      ListNode.Cons(b, rest) =>
+        let half = exp_by_bits(base * base, rest);
+        match b {
+          0 => half,
+          _ => base * half,
+        },
+    }
+  }
+
+  -- The arity-2 FRI fold. `index_bits` = the low `log_height` index bits, LSB
+  -- first (so `reverse_bits_len` = reversing the list).
+  fn fri_fold2(index_bits: List‹G›, log_height: G, beta: Ext, e0: Ext, e1: Ext) -> Ext {
+    let g = two_adic_gen(log_height + 1);
+    let s = exp_by_bits(g, glist_rev(index_bits, store(ListNode.Nil)));
+    let two_s = 2 * s;
+    let t1 = ext_div(ext_add(e0, e1), [2, 0]);
+    let t2 = ext_mul(beta, ext_div(ext_sub(e0, e1), [two_s, 0]));
+    ext_add(t1, t2)
+  }
+
+  -- Self-test: arity-2 fold at index 5, log_height 3 vs the `fri_fold_ref`
+  -- reference (computed by the real `TwoAdicFriFolding::fold_row`).
+  pub fn fri_fold_test() -> G {
+    let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
+                       store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    let e0 = [0x1111111111111111, 0x2222222222222222];
+    let e1 = [0x3333333333333333, 0x4444444444444444];
+    let beta = [0x5555555555555555, 0x6666666666666666];
+    let folded = fri_fold2(index_bits, 3, beta, e0, e1);
+    assert_eq!(folded[0], 9349172584842537206);
+    assert_eq!(folded[1], 984486879173118962);
+    1
+  }
+
+  -- ==========================================================================
+  -- `open_input` reduced openings (`fri/verifier.rs::open_input` inner loop).
+  --
+  -- For a matrix opened at a point z with verifier domain point x, accumulate
+  -- over the matrix columns:
+  --   ro += alpha_pow · (p_z − p_x) · q ;  alpha_pow *= alpha,   q = 1/(z − x)
+  -- where p_x are the INPUT opened base values (from the query's batch opening,
+  -- authenticated by the input MMCS) and p_z the OOD opened ext values. The
+  -- query domain point is
+  --   x = GENERATOR(7) · two_adic_gen(log_height)^{reverse_bits_len(idx, log_height)}.
+  -- All extension arithmetic — no Merkle hashing here.
+  -- ==========================================================================
+
+  -- The base-field query domain point x. `index_bits` = low-`log_height` index
+  -- bits, LSB first (so reverse_bits_len = reversing the list).
+  fn ro_x(index_bits: List‹G›, log_height: G) -> G {
+    7 * exp_by_bits(two_adic_gen(log_height), glist_rev(index_bits, store(ListNode.Nil)))
+  }
+
+  -- Accumulate one matrix-point's column contributions. `q = 1/(z − x)`.
+  fn ro_fold(p_x: List‹G›, p_z: List‹Ext›, q: Ext, alpha: Ext, ro: Ext, ap: Ext)
+      -> (Ext, Ext) {
+    match load(p_x) {
+      ListNode.Nil => (ro, ap),
+      ListNode.Cons(px, pxr) =>
+        let &ListNode.Cons(pz, pzr) = p_z;
+        let term = ext_mul(ext_mul(ap, ext_sub(pz, [px, 0])), q);
+        ro_fold(pxr, pzr, q, alpha, ext_add(ro, term), ext_mul(ap, alpha)),
+    }
+  }
+
+  -- Self-test vs `ro_fold_ref`: x at index 5 / log_height 3, then accumulate
+  -- (p_z − p_x)/(z − x) over 3 columns with alpha powers.
+  pub fn ro_fold_test() -> G {
+    let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
+                       store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    let x = ro_x(index_bits, 3);
+    assert_eq!(x, 117440512);
+    let z = [0x123456789a, 0xabcdef01];
+    let alpha = [0x1111111111111111, 0x2];
+    let p_x = store(ListNode.Cons(11, store(ListNode.Cons(22,
+                store(ListNode.Cons(33, store(ListNode.Nil)))))));
+    let p_z = store(ListNode.Cons([100, 1], store(ListNode.Cons([200, 2],
+                store(ListNode.Cons([300, 3], store(ListNode.Nil)))))));
+    let q = ext_inverse(ext_sub(z, [x, 0]));
+    let (ro, _ap) = ro_fold(p_x, p_z, q, alpha, [0, 0], [1, 0]);
+    assert_eq!(ro[0], 7130765474285082575);
+    assert_eq!(ro[1], 12254464995725315436);
+    1
+  }
+
   -- ==========================================================================
   -- PCS verification entry (STILL A STUB).
   -- Accepts any FRI opening proof. A real implementation will check query
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index d3fa989f..8162447a 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -140,6 +140,12 @@ def main : IO UInt32 := do
   match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_challenger4_test).get! #[] default with
   | .error e => IO.eprintln s!"✗ pcs_challenger4_test FAILED — {e}"; return 1
   | .ok _ => IO.println "✓ pcs_challenger4_test: PCS challenger continuation (α_pcs/α_fri/β/index) matches reference"
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `fri_fold_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ fri_fold_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ fri_fold_test: FRI arity-2 fold_row matches reference"
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `ro_fold_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ ro_fold_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ ro_fold_test: open_input reduced-opening math matches reference"
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1

From eacc7bd85282a317ab9c87028bfd83f1637f4726 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 8 Jun 2026 13:29:09 -0300
Subject: [PATCH 22/28] Goldilocks non-native arithmetic

---
 Ix/MultiStark.lean            |   2 +
 Ix/MultiStark/Goldilocks.lean | 354 ++++++++++++++++++++++++++++++++++
 RecursiveVerifier.lean        |  10 +
 3 files changed, 366 insertions(+)
 create mode 100644 Ix/MultiStark/Goldilocks.lean

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index e1e72c4f..b7612877 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -2,6 +2,7 @@ module
 public import Ix.Aiur.Meta
 public import Ix.IxVM.Core
 public import Ix.IxVM.ByteStream
+public import Ix.MultiStark.Goldilocks
 public import Ix.MultiStark.Deserialize
 public import Ix.MultiStark.Keccak
 public import Ix.MultiStark.Pcs
@@ -69,6 +70,7 @@ def entrypoints := ⟦
 the keccak-256 implementation, and the entrypoint. -/
 def multiStark : Except Aiur.Global Aiur.Source.Toplevel := do
   let t ← IxVM.core.merge IxVM.byteStream
+  let t ← t.merge MultiStark.goldilocks
   let t ← t.merge deserialize
   let t ← t.merge keccak
   let t ← t.merge systemDeserialize
diff --git a/Ix/MultiStark/Goldilocks.lean b/Ix/MultiStark/Goldilocks.lean
new file mode 100644
index 00000000..7e1381d4
--- /dev/null
+++ b/Ix/MultiStark/Goldilocks.lean
@@ -0,0 +1,354 @@
+module
+public import Ix.Aiur.Meta
+
+/-!
+# Non-native Goldilocks field arithmetic (`Goldilocks = [U8; 8]`)
+
+The recursive verifier computes on the *inner* proof's field (Goldilocks),
+but Aiur compiles the verifier to its *own* (outer) field — which is **not
+always Goldilocks** (it may be the BLS scalar field). So inner-field arithmetic
+cannot use Aiur's native `+`/`*` (those compute in the outer field); it must be
+**emulated** on bytes.
+
+A `Goldilocks` element is its canonical value `< p = 2⁶⁴ − 2³² + 1` held as 8
+little-endian bytes (`[U8; 8]`), and `ExtGoldilocks = [Goldilocks; 2]` is the
+degree-2 extension `𝔽_p[X]/(X² − 7)`. All ops are built from the u8 gadgets
+(`u8_add`/`u8_sub`/`u8_mul` give full carry/borrow/16-bit-product), so they are
+field-agnostic and produce bytes directly (no field→byte decomposition needed).
+
+`EPSILON = 2³² − 1`, and `2⁶⁴ ≡ EPSILON (mod p)`, the basis of the reductions.
+
+Validated byte-for-byte against `multi-stark`'s native Goldilocks (`gl_ops_ref`
+in `multi-stark/src/types.rs`).
+-/
+
+public section
+
+namespace MultiStark
+
+def goldilocks := ⟦
+  type Goldilocks = [U8; 8]
+  type ExtGoldilocks = [Goldilocks; 2]
+
+  -- p = 2⁶⁴ − 2³² + 1 and EPSILON = 2³² − 1, little-endian.
+  fn gl_p() -> Goldilocks { [1u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8] }
+  fn gl_eps() -> Goldilocks { [255u8, 255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 0u8] }
+  fn gl_zero() -> Goldilocks { [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }
+  fn gl_one() -> Goldilocks { [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }
+
+  -- ==========================================================================
+  -- Byte-vector primitives (carry/borrow chains over the u8 gadgets).
+  -- ==========================================================================
+
+  -- Add two bytes with a carry-in (0/1). Returns (sum byte, carry-out 0/1).
+  -- `u8_add` adds only two bytes; the carry-in is folded with a second add. At
+  -- most one of the two overflow flags fires, so the carry-out stays 0/1.
+  fn adc(a: U8, b: U8, cin: G) -> (U8, G) {
+    let (s1, o1) = u8_add(a, b);
+    let (s2, o2) = u8_add(s1, u8_from_field_unsafe(cin));
+    (s2, to_field(o1) + to_field(o2))
+  }
+
+  -- Subtract two bytes with a borrow-in (0/1). Returns (diff byte, borrow-out).
+  fn sbb(a: U8, b: U8, bin: G) -> (U8, G) {
+    let (d1, w1) = u8_sub(a, b);
+    let (d2, w2) = u8_sub(d1, u8_from_field_unsafe(bin));
+    (d2, to_field(w1) + to_field(w2))
+  }
+
+  -- 8-byte (64-bit) add. Returns the 8-byte sum and the final carry (0/1).
+  fn add8(a: Goldilocks, b: Goldilocks) -> (Goldilocks, G) {
+    let (r0, c0) = adc(a[0], b[0], 0);
+    let (r1, c1) = adc(a[1], b[1], c0);
+    let (r2, c2) = adc(a[2], b[2], c1);
+    let (r3, c3) = adc(a[3], b[3], c2);
+    let (r4, c4) = adc(a[4], b[4], c3);
+    let (r5, c5) = adc(a[5], b[5], c4);
+    let (r6, c6) = adc(a[6], b[6], c5);
+    let (r7, c7) = adc(a[7], b[7], c6);
+    ([r0, r1, r2, r3, r4, r5, r6, r7], c7)
+  }
+
+  -- 8-byte (64-bit) subtract. Returns the 8-byte difference and final borrow.
+  fn sub8(a: Goldilocks, b: Goldilocks) -> (Goldilocks, G) {
+    let (r0, w0) = sbb(a[0], b[0], 0);
+    let (r1, w1) = sbb(a[1], b[1], w0);
+    let (r2, w2) = sbb(a[2], b[2], w1);
+    let (r3, w3) = sbb(a[3], b[3], w2);
+    let (r4, w4) = sbb(a[4], b[4], w3);
+    let (r5, w5) = sbb(a[5], b[5], w4);
+    let (r6, w6) = sbb(a[6], b[6], w5);
+    let (r7, w7) = sbb(a[7], b[7], w6);
+    ([r0, r1, r2, r3, r4, r5, r6, r7], w7)
+  }
+
+  -- Branchless select on a 0/1 flag: `cond ? x : y` (per byte). `cond` is a
+  -- field element in {0,1}; each result byte equals x[i] or y[i] (both < 256),
+  -- so `u8_from_field_unsafe` is safe.
+  fn sel(cond: G, x: U8, y: U8) -> U8 {
+    u8_from_field_unsafe(to_field(x) * cond + to_field(y) * (1 - cond))
+  }
+  fn select8(cond: G, x: Goldilocks, y: Goldilocks) -> Goldilocks {
+    [sel(cond, x[0], y[0]), sel(cond, x[1], y[1]), sel(cond, x[2], y[2]),
+     sel(cond, x[3], y[3]), sel(cond, x[4], y[4]), sel(cond, x[5], y[5]),
+     sel(cond, x[6], y[6]), sel(cond, x[7], y[7])]
+  }
+
+  -- ==========================================================================
+  -- Base field add / sub (mod p).
+  -- ==========================================================================
+
+  -- a + b mod p. The raw sum is `c·2⁶⁴ + s` with `c ∈ {0,1}`, value `< 2p`:
+  --   • c = 1 ⇒ s < p (since 2⁶⁴ > p), and `2⁶⁴ + s − p = s + EPSILON`
+  --     (a non-overflowing add since the true result is < p);
+  --   • c = 0 ⇒ reduce by one conditional subtraction of p (when s ≥ p).
+  fn gl_add(a: Goldilocks, b: Goldilocks) -> Goldilocks {
+    let (s, c) = add8(a, b);
+    let (s_minus_p, borrow) = sub8(s, gl_p());  -- borrow = 1 iff s < p
+    let (s_plus_eps, _) = add8(s, gl_eps());
+    let c0 = select8(1 - borrow, s_minus_p, s); -- c=0 branch: (s≥p ? s−p : s)
+    select8(c, s_plus_eps, c0)
+  }
+
+  -- a − b mod p. `sub8` borrow = 1 iff a < b, in which case the true result is
+  -- `(a − b mod 2⁶⁴) − EPSILON = a − b + p`.
+  fn gl_sub(a: Goldilocks, b: Goldilocks) -> Goldilocks {
+    let (d, borrow) = sub8(a, b);
+    let (d_minus_eps, _) = sub8(d, gl_eps());
+    select8(borrow, d_minus_eps, d)
+  }
+
+  fn gl_neg(a: Goldilocks) -> Goldilocks { gl_sub(gl_zero(), a) }
+  fn gl_seven() -> Goldilocks { [7u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }
+
+  -- ==========================================================================
+  -- Base field multiply (mod p): schoolbook 64×64 → 128-bit product, then the
+  -- Goldilocks `reduce128` (ported from Plonky3 `goldilocks.rs`).
+  -- ==========================================================================
+
+  -- 8-byte × 1-byte → 9-byte product (`a · m`, with `a < 2⁶⁴`, `m < 2⁸`, so the
+  -- result is `< 2⁷²`). Column `k` is `lo_k + hi_{k-1} + carry`, and the carry
+  -- provably stays in {0,1} (each column sum ≤ 255+255+1 = 511 < 512).
+  fn mul1(a: Goldilocks, m: U8) -> [U8; 9] {
+    let (l0, h0) = u8_mul(a[0], m);
+    let (l1, h1) = u8_mul(a[1], m);
+    let (l2, h2) = u8_mul(a[2], m);
+    let (l3, h3) = u8_mul(a[3], m);
+    let (l4, h4) = u8_mul(a[4], m);
+    let (l5, h5) = u8_mul(a[5], m);
+    let (l6, h6) = u8_mul(a[6], m);
+    let (l7, h7) = u8_mul(a[7], m);
+    let (r1, c1) = adc(l1, h0, 0);
+    let (r2, c2) = adc(l2, h1, c1);
+    let (r3, c3) = adc(l3, h2, c2);
+    let (r4, c4) = adc(l4, h3, c3);
+    let (r5, c5) = adc(l5, h4, c4);
+    let (r6, c6) = adc(l6, h5, c5);
+    let (r7, c7) = adc(l7, h6, c6);
+    let (r8, _)  = adc(h7, 0u8, c7);
+    [l0, r1, r2, r3, r4, r5, r6, r7, r8]
+  }
+
+  -- 16-byte (128-bit) add, carry chain. The final carry is discarded (used only
+  -- to accumulate partial-product rows whose total provably fits in 128 bits).
+  fn add16(x: [U8; 16], y: [U8; 16]) -> [U8; 16] {
+    let (r0,  c0)  = adc(x[0],  y[0],  0);
+    let (r1,  c1)  = adc(x[1],  y[1],  c0);
+    let (r2,  c2)  = adc(x[2],  y[2],  c1);
+    let (r3,  c3)  = adc(x[3],  y[3],  c2);
+    let (r4,  c4)  = adc(x[4],  y[4],  c3);
+    let (r5,  c5)  = adc(x[5],  y[5],  c4);
+    let (r6,  c6)  = adc(x[6],  y[6],  c5);
+    let (r7,  c7)  = adc(x[7],  y[7],  c6);
+    let (r8,  c8)  = adc(x[8],  y[8],  c7);
+    let (r9,  c9)  = adc(x[9],  y[9],  c8);
+    let (r10, c10) = adc(x[10], y[10], c9);
+    let (r11, c11) = adc(x[11], y[11], c10);
+    let (r12, c12) = adc(x[12], y[12], c11);
+    let (r13, c13) = adc(x[13], y[13], c12);
+    let (r14, c14) = adc(x[14], y[14], c13);
+    let (r15, _)   = adc(x[15], y[15], c14);
+    [r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15]
+  }
+
+  -- Full 64×64 → 128-bit product as 16 little-endian bytes. Eight single-byte
+  -- rows `a · b[i]`, each shifted left by `i` bytes and accumulated.
+  fn mul128(a: Goldilocks, b: Goldilocks) -> [U8; 16] {
+    let m0 = mul1(a, b[0]);
+    let m1 = mul1(a, b[1]);
+    let m2 = mul1(a, b[2]);
+    let m3 = mul1(a, b[3]);
+    let m4 = mul1(a, b[4]);
+    let m5 = mul1(a, b[5]);
+    let m6 = mul1(a, b[6]);
+    let m7 = mul1(a, b[7]);
+    let acc = [m0[0], m0[1], m0[2], m0[3], m0[4], m0[5], m0[6], m0[7], m0[8],
+               0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let rb1 = [0u8, m1[0], m1[1], m1[2], m1[3], m1[4], m1[5], m1[6], m1[7], m1[8],
+               0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let acc = add16(acc, rb1);
+    let rb2 = [0u8, 0u8, m2[0], m2[1], m2[2], m2[3], m2[4], m2[5], m2[6], m2[7], m2[8],
+               0u8, 0u8, 0u8, 0u8, 0u8];
+    let acc = add16(acc, rb2);
+    let rb3 = [0u8, 0u8, 0u8, m3[0], m3[1], m3[2], m3[3], m3[4], m3[5], m3[6], m3[7], m3[8],
+               0u8, 0u8, 0u8, 0u8];
+    let acc = add16(acc, rb3);
+    let rb4 = [0u8, 0u8, 0u8, 0u8, m4[0], m4[1], m4[2], m4[3], m4[4], m4[5], m4[6], m4[7], m4[8],
+               0u8, 0u8, 0u8];
+    let acc = add16(acc, rb4);
+    let rb5 = [0u8, 0u8, 0u8, 0u8, 0u8, m5[0], m5[1], m5[2], m5[3], m5[4], m5[5], m5[6], m5[7], m5[8],
+               0u8, 0u8];
+    let acc = add16(acc, rb5);
+    let rb6 = [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, m6[0], m6[1], m6[2], m6[3], m6[4], m6[5], m6[6], m6[7], m6[8],
+               0u8];
+    let acc = add16(acc, rb6);
+    let rb7 = [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, m7[0], m7[1], m7[2], m7[3], m7[4], m7[5], m7[6], m7[7], m7[8]];
+    add16(acc, rb7)
+  }
+
+  -- Reduce a 128-bit value (16 LE bytes) mod p. Port of Plonky3 `reduce128`:
+  -- with `x = x_lo + x_hi·2⁶⁴`, split `x_hi = x_hi_lo + x_hi_hi·2³²`; then
+  -- `2⁶⁴ ≡ EPSILON` and `2⁹⁶ ≡ −1` give `x ≡ x_lo − x_hi_hi + x_hi_lo·EPSILON`.
+  -- A final conditional subtraction of p canonicalizes (Plonky3 keeps a
+  -- non-canonical u64; we need the canonical bytes).
+  fn reduce128(p: [U8; 16]) -> Goldilocks {
+    let xlo = [p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]];
+    let xhl = [p[8], p[9], p[10], p[11], 0u8, 0u8, 0u8, 0u8];   -- low 32 bits of x_hi
+    let xhh = [p[12], p[13], p[14], p[15], 0u8, 0u8, 0u8, 0u8]; -- high 32 bits of x_hi
+    -- t0 = x_lo − x_hi_hi (subtract EPSILON on borrow; cannot underflow).
+    let (t0w, borrow) = sub8(xlo, xhh);
+    let (t0b, _) = sub8(t0w, gl_eps());
+    let t0 = select8(borrow, t0b, t0w);
+    -- t1 = x_hi_lo · EPSILON  (< 2⁶⁴, so only the low 8 product bytes matter).
+    let t1full = mul128(xhl, gl_eps());
+    let t1 = [t1full[0], t1full[1], t1full[2], t1full[3],
+              t1full[4], t1full[5], t1full[6], t1full[7]];
+    -- t2 = t0 + t1  (add EPSILON on overflow; result stays < 2⁶⁴).
+    let (t2w, carry) = add8(t0, t1);
+    let (t2c, _) = add8(t2w, gl_eps());
+    let t2 = select8(carry, t2c, t2w);
+    -- canonicalize: borrow2 = 1 iff t2 < p ⇒ keep t2, else subtract p.
+    let (t2mp, borrow2) = sub8(t2, gl_p());
+    select8(borrow2, t2, t2mp)
+  }
+
+  fn gl_mul(a: Goldilocks, b: Goldilocks) -> Goldilocks { reduce128(mul128(a, b)) }
+  fn gl_sq(a: Goldilocks) -> Goldilocks { gl_mul(a, a) }
+
+  -- ==========================================================================
+  -- Base field inverse / divide via Fermat: a⁻¹ = a^(p−2).
+  -- p − 2 = 2⁶⁴ − 2³² − 1 — in binary: bit 32 is 0, every other bit is 1
+  -- (31 ones, one zero, 32 ones). `gl_run` applies `n` steps of `acc ← acc²·b`.
+  -- ==========================================================================
+  fn gl_run(acc: Goldilocks, base: Goldilocks, n: G) -> Goldilocks {
+    match n {
+      0 => acc,
+      _ => gl_run(gl_mul(gl_sq(acc), base), base, n - 1),
+    }
+  }
+  fn gl_inverse(x: Goldilocks) -> Goldilocks {
+    let acc = gl_run(x, x, 30);  -- bits 63..33: 31 ones (initial acc = x is bit 63)
+    let acc = gl_sq(acc);        -- bit 32: a single 0
+    gl_run(acc, x, 32)           -- bits 31..0: 32 ones
+  }
+  fn gl_div(a: Goldilocks, b: Goldilocks) -> Goldilocks { gl_mul(a, gl_inverse(b)) }
+
+  -- ==========================================================================
+  -- Extension field ExtGoldilocks = 𝔽_p[X]/(X² − 7).
+  -- ==========================================================================
+  fn eg_add(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks {
+    [gl_add(a[0], b[0]), gl_add(a[1], b[1])]
+  }
+  fn eg_sub(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks {
+    [gl_sub(a[0], b[0]), gl_sub(a[1], b[1])]
+  }
+  -- (a0 + a1·X)(b0 + b1·X) = (a0·b0 + 7·a1·b1) + (a0·b1 + a1·b0)·X.
+  fn eg_mul(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks {
+    [gl_add(gl_mul(a[0], b[0]), gl_mul(gl_seven(), gl_mul(a[1], b[1]))),
+     gl_add(gl_mul(a[0], b[1]), gl_mul(a[1], b[0]))]
+  }
+  -- conjugate ā = a0 − a1·X, norm a·ā = a0² − 7·a1² ∈ 𝔽_p, a⁻¹ = ā / norm.
+  fn eg_inverse(a: ExtGoldilocks) -> ExtGoldilocks {
+    let norm = gl_sub(gl_sq(a[0]), gl_mul(gl_seven(), gl_sq(a[1])));
+    let ninv = gl_inverse(norm);
+    [gl_mul(a[0], ninv), gl_mul(gl_neg(a[1]), ninv)]
+  }
+  fn eg_div(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks {
+    eg_mul(a, eg_inverse(b))
+  }
+
+  -- ==========================================================================
+  -- Self-test (vs `gl_ops_ref`).
+  -- ==========================================================================
+  fn assert_g8(x: Goldilocks, e: Goldilocks) -> G {
+    assert_eq!(to_field(x[0]), to_field(e[0]));
+    assert_eq!(to_field(x[1]), to_field(e[1]));
+    assert_eq!(to_field(x[2]), to_field(e[2]));
+    assert_eq!(to_field(x[3]), to_field(e[3]));
+    assert_eq!(to_field(x[4]), to_field(e[4]));
+    assert_eq!(to_field(x[5]), to_field(e[5]));
+    assert_eq!(to_field(x[6]), to_field(e[6]));
+    assert_eq!(to_field(x[7]), to_field(e[7]));
+    1
+  }
+  pub fn gl_addsub_test() -> G {
+    let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210
+    let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0
+    assert_eq!(assert_g8(gl_add(a, b), [255u8, 16u8, 17u8, 17u8, 18u8, 17u8, 17u8, 17u8]), 1);
+    assert_eq!(assert_g8(gl_sub(a, b), [32u8, 83u8, 151u8, 219u8, 31u8, 100u8, 168u8, 236u8]), 1);
+    assert_eq!(assert_g8(gl_sub(b, a), [225u8, 172u8, 104u8, 36u8, 223u8, 155u8, 87u8, 19u8]), 1);
+    -- edge: (p-1) + 5 ≡ 4 ; 5 - (p-1) ≡ 6
+    let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8]; -- 0xFFFFFFFF00000000
+    let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    assert_eq!(assert_g8(gl_add(pm1, five), [4u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1);
+    assert_eq!(assert_g8(gl_sub(five, pm1), [6u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1);
+    1
+  }
+
+  fn assert_eg(x: ExtGoldilocks, e0: Goldilocks, e1: Goldilocks) -> G {
+    assert_eq!(assert_g8(x[0], e0), 1);
+    assert_eq!(assert_g8(x[1], e1), 1);
+    1
+  }
+  pub fn gl_muldiv_test() -> G {
+    let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210
+    let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0
+    assert_eq!(assert_g8(gl_mul(a, b), [212u8, 186u8, 123u8, 108u8, 31u8, 253u8, 234u8, 250u8]), 1);
+    assert_eq!(assert_g8(gl_inverse(a), [97u8, 29u8, 109u8, 46u8, 183u8, 100u8, 8u8, 102u8]), 1);
+    assert_eq!(assert_g8(gl_div(a, b), [63u8, 59u8, 61u8, 54u8, 46u8, 255u8, 29u8, 186u8]), 1);
+    -- edge: (p-1)·5 ≡ p-5
+    let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8];
+    let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    assert_eq!(assert_g8(gl_mul(pm1, five), [252u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8]), 1);
+    -- a·a⁻¹ = 1 and b·b⁻¹ = 1
+    assert_eq!(assert_g8(gl_mul(a, gl_inverse(a)), gl_one()), 1);
+    assert_eq!(assert_g8(gl_mul(b, gl_inverse(b)), gl_one()), 1);
+    1
+  }
+  pub fn eg_ops_test() -> G {
+    -- e0 = (0xFEDCBA9876543210, 0x0123456789ABCDEF), e1 = (0x1111111122222222, 0x3333333344444444)
+    let e0 = [[16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8],
+              [239u8, 205u8, 171u8, 137u8, 103u8, 69u8, 35u8, 1u8]];
+    let e1 = [[34u8, 34u8, 34u8, 34u8, 17u8, 17u8, 17u8, 17u8],
+              [68u8, 68u8, 68u8, 68u8, 51u8, 51u8, 51u8, 51u8]];
+    assert_eq!(assert_eg(eg_add(e0, e1),
+      [49u8, 84u8, 118u8, 152u8, 170u8, 203u8, 237u8, 15u8],
+      [51u8, 18u8, 240u8, 205u8, 154u8, 120u8, 86u8, 52u8]), 1);
+    assert_eq!(assert_eg(eg_mul(e0, e1),
+      [10u8, 238u8, 162u8, 36u8, 224u8, 127u8, 182u8, 134u8],
+      [215u8, 234u8, 152u8, 224u8, 219u8, 254u8, 32u8, 67u8]), 1);
+    assert_eq!(assert_eg(eg_inverse(e0),
+      [221u8, 238u8, 29u8, 131u8, 179u8, 89u8, 214u8, 216u8],
+      [114u8, 99u8, 206u8, 108u8, 15u8, 88u8, 161u8, 246u8]), 1);
+    assert_eq!(assert_eg(eg_div(e0, e1),
+      [42u8, 59u8, 64u8, 77u8, 226u8, 214u8, 95u8, 63u8],
+      [200u8, 46u8, 148u8, 147u8, 124u8, 180u8, 248u8, 140u8]), 1);
+    -- e0 · e0⁻¹ = 1
+    assert_eq!(assert_eg(eg_mul(e0, eg_inverse(e0)), gl_one(), gl_zero()), 1);
+    1
+  }
+⟧
+
+end MultiStark
+
+end
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 8162447a..8b47e572 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -146,6 +146,16 @@ def main : IO UInt32 := do
   match vCompiled.bytecode.execute (vCompiled.getFuncIdx `ro_fold_test).get! #[] default with
   | .error e => IO.eprintln s!"✗ ro_fold_test FAILED — {e}"; return 1
   | .ok _ => IO.println "✓ ro_fold_test: open_input reduced-opening math matches reference"
+  -- TEMP: validate non-native Goldilocks byte-arithmetic (vs `gl_ops_ref`).
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `gl_addsub_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ gl_addsub_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ gl_addsub_test: non-native Goldilocks add/sub match reference"
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `gl_muldiv_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ gl_muldiv_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ gl_muldiv_test: non-native Goldilocks mul/inverse/div match reference"
+  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `eg_ops_test).get! #[] default with
+  | .error e => IO.eprintln s!"✗ eg_ops_test FAILED — {e}"; return 1
+  | .ok _ => IO.println "✓ eg_ops_test: non-native ExtGoldilocks add/mul/inverse/div match reference"
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1

From fe7c15a2dd0c6526f8033f8a2ac165944cf9e8fa Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Mon, 8 Jun 2026 17:49:53 -0300
Subject: [PATCH 23/28] OOD check non-native arithmetic

---
 Ix/MultiStark/Deserialize.lean       |  61 ++------
 Ix/MultiStark/Goldilocks.lean        |  34 +++++
 Ix/MultiStark/Pcs.lean               |  70 +++++----
 Ix/MultiStark/SystemDeserialize.lean |   9 +-
 Ix/MultiStark/Verifier.lean          | 211 ++++++++++++++-------------
 5 files changed, 195 insertions(+), 190 deletions(-)

diff --git a/Ix/MultiStark/Deserialize.lean b/Ix/MultiStark/Deserialize.lean
index a4f69eeb..d814a434 100644
--- a/Ix/MultiStark/Deserialize.lean
+++ b/Ix/MultiStark/Deserialize.lean
@@ -37,9 +37,11 @@ def deserialize := ⟦
   -- ==========================================================================
 
   -- `ExtVal = BinomialExtensionField<Goldilocks, 2> = 𝔽_p[X]/(X² - 7)`, stored
-  -- as its two Goldilocks coefficients `[c0, c1]` (= `c0 + c1·X`). On the wire
-  -- each is a raw `u64` limb; `read_ext` reduces it into a field element.
-  type Ext = [G; 2]
+  -- as its two Goldilocks coefficients `[c0, c1]` (= `c0 + c1·X`). Each coefficient
+  -- is a NON-NATIVE Goldilocks element (`[U8; 8]`, canonical LE bytes) so that the
+  -- composition-polynomial arithmetic is field-agnostic — it emulates Goldilocks
+  -- on bytes instead of using Aiur's native (outer) field. See `Goldilocks.lean`.
+  type Ext = [[U8; 8]; 2]
 
   -- A Merkle digest: `[u64; DIGEST_ELEMS]` with `DIGEST_ELEMS = 4`.
   type Digest = [U64; 4]
@@ -152,60 +154,15 @@ def deserialize := ⟦
       + 0x100000000000000 * to_field(b[7])
   }
 
-  -- `ExtVal` -> two `u64` limbs (no length prefix), reduced to field coefficients.
+  -- `ExtVal` -> two `u64` limbs (no length prefix), each canonicalized into a
+  -- non-native Goldilocks element (`gl_reduce` maps a non-canonical wire repr,
+  -- e.g. `0` shipped as `p`, to its canonical `< p` bytes).
   fn read_ext(stream: ByteStream) -> (Ext, ByteStream) {
     let (a, s0) = read_u64(stream);
     let (b, s1) = read_u64(s0);
-    ([limb_to_field(a), limb_to_field(b)], s1)
+    ([gl_reduce(a), gl_reduce(b)], s1)
   }
 
-  -- Extension-field addition: componentwise.
-  fn ext_add(a: Ext, b: Ext) -> Ext {
-    [a[0] + b[0], a[1] + b[1]]
-  }
-
-  -- Extension-field multiplication in 𝔽_p[X]/(X² - 7):
-  --   (a0 + a1·X)(b0 + b1·X) = (a0·b0 + 7·a1·b1) + (a0·b1 + a1·b0)·X.
-  fn ext_mul(a: Ext, b: Ext) -> Ext {
-    [a[0] * b[0] + 7 * (a[1] * b[1]), a[0] * b[1] + a[1] * b[0]]
-  }
-
-  fn ext_sub(a: Ext, b: Ext) -> Ext {
-    [a[0] - b[0], a[1] - b[1]]
-  }
-
-  fn ext_neg(a: Ext) -> Ext {
-    [0 - a[0], 0 - a[1]]
-  }
-
-  -- Goldilocks (base-field) inverse via Fermat: x^(p-2). Aiur has no field
-  -- division, so we exponentiate. p = 2^64 - 2^32 + 1, so
-  -- p - 2 = 2^64 - 2^32 - 1 — in binary: bit 32 is 0, every other bit is 1.
-  -- `g_run(acc, base, n)` applies n steps of `acc ← acc²·base` (a run of
-  -- set exponent bits). 31 ones, one zero, 32 ones reconstructs p-2.
-  fn g_run(acc: G, base: G, n: G) -> G {
-    match n {
-      0 => acc,
-      _ => g_run(acc * acc * base, base, n - 1),
-    }
-  }
-  fn g_inverse(x: G) -> G {
-    let acc = g_run(x, x, 30);   -- bits 63..33: 31 ones (initial acc=x is bit 63)
-    let acc = acc * acc;          -- bit 32: a single 0
-    g_run(acc, x, 32)             -- bits 31..0: 32 ones
-  }
-
-  -- Extension inverse in 𝔽_p[X]/(X² - 7). For a = a0 + a1·X the conjugate is
-  -- a0 - a1·X and the norm a·ā = a0² - 7·a1² ∈ 𝔽_p, so a⁻¹ = ā / norm.
-  fn ext_inverse(a: Ext) -> Ext {
-    let norm = a[0] * a[0] - 7 * (a[1] * a[1]);
-    let ninv = g_inverse(norm);
-    [a[0] * ninv, (0 - a[1]) * ninv]
-  }
-
-  fn ext_div(a: Ext, b: Ext) -> Ext {
-    ext_mul(a, ext_inverse(b))
-  }
 
   -- Merkle digest -> `[u64; 4]`, no length prefix.
   fn read_digest(stream: ByteStream) -> (Digest, ByteStream) {
diff --git a/Ix/MultiStark/Goldilocks.lean b/Ix/MultiStark/Goldilocks.lean
index 7e1381d4..04959ab0 100644
--- a/Ix/MultiStark/Goldilocks.lean
+++ b/Ix/MultiStark/Goldilocks.lean
@@ -120,6 +120,33 @@ def goldilocks := ⟦
 
   fn gl_neg(a: Goldilocks) -> Goldilocks { gl_sub(gl_zero(), a) }
   fn gl_seven() -> Goldilocks { [7u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }
+  fn gl_two() -> Goldilocks { [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }
+
+  -- Canonicalize an arbitrary 8-byte value (possibly ≥ p, e.g. a raw hash limb
+  -- or a non-canonical wire encoding) to the canonical representative `< p`.
+  -- Any 8-byte value is `< 2⁶⁴ < 2p`, so one conditional subtraction of p
+  -- suffices (borrow = 1 iff x < p ⇒ already canonical).
+  fn gl_reduce(x: Goldilocks) -> Goldilocks {
+    let (xmp, borrow) = sub8(x, gl_p());
+    select8(borrow, x, xmp)
+  }
+
+  -- 1 iff `x` is the field zero (canonicalize first, then test all bytes zero).
+  -- The byte sum is `< 8·256 = 2048`, so `eq_zero` of the sum is field-agnostic.
+  fn gl_is_zero(x: Goldilocks) -> G {
+    let r = gl_reduce(x);
+    eq_zero(to_field(r[0]) + to_field(r[1]) + to_field(r[2]) + to_field(r[3])
+          + to_field(r[4]) + to_field(r[5]) + to_field(r[6]) + to_field(r[7]))
+  }
+
+  -- 1 iff two canonical Goldilocks values are equal (byte-exact). Inputs must be
+  -- canonical (`gl_*` outputs always are); each `to_field` difference is small.
+  fn gl_eq(a: Goldilocks, b: Goldilocks) -> G {
+    eq_zero(to_field(a[0]) - to_field(b[0])) * eq_zero(to_field(a[1]) - to_field(b[1]))
+      * eq_zero(to_field(a[2]) - to_field(b[2])) * eq_zero(to_field(a[3]) - to_field(b[3]))
+      * eq_zero(to_field(a[4]) - to_field(b[4])) * eq_zero(to_field(a[5]) - to_field(b[5]))
+      * eq_zero(to_field(a[6]) - to_field(b[6])) * eq_zero(to_field(a[7]) - to_field(b[7]))
+  }
 
   -- ==========================================================================
   -- Base field multiply (mod p): schoolbook 64×64 → 128-bit product, then the
@@ -262,6 +289,9 @@ def goldilocks := ⟦
   fn eg_sub(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks {
     [gl_sub(a[0], b[0]), gl_sub(a[1], b[1])]
   }
+  fn eg_neg(a: ExtGoldilocks) -> ExtGoldilocks {
+    [gl_neg(a[0]), gl_neg(a[1])]
+  }
   -- (a0 + a1·X)(b0 + b1·X) = (a0·b0 + 7·a1·b1) + (a0·b1 + a1·b0)·X.
   fn eg_mul(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks {
     [gl_add(gl_mul(a[0], b[0]), gl_mul(gl_seven(), gl_mul(a[1], b[1]))),
@@ -276,6 +306,10 @@ def goldilocks := ⟦
   fn eg_div(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks {
     eg_mul(a, eg_inverse(b))
   }
+  -- 1 iff two extension elements are equal (both coordinates byte-exact).
+  fn eg_eq(a: ExtGoldilocks, b: ExtGoldilocks) -> G {
+    gl_eq(a[0], b[0]) * gl_eq(a[1], b[1])
+  }
 
   -- ==========================================================================
   -- Self-test (vs `gl_ops_ref`).
diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean
index 9138f71a..f05f5373 100644
--- a/Ix/MultiStark/Pcs.lean
+++ b/Ix/MultiStark/Pcs.lean
@@ -269,14 +269,15 @@ def pcs := ⟦
   }
 
   -- base^(Σ bits_i · 2^i), bits LSB-first (square-and-multiply over the bits).
-  fn exp_by_bits(base: G, bits: List‹G›) -> G {
+  -- `base` is a non-native Goldilocks element; `bits` is a native bit list.
+  fn exp_by_bits(base: [U8; 8], bits: List‹G›) -> [U8; 8] {
     match load(bits) {
-      ListNode.Nil => 1,
+      ListNode.Nil => gl_one(),
       ListNode.Cons(b, rest) =>
-        let half = exp_by_bits(base * base, rest);
+        let half = exp_by_bits(gl_sq(base), rest);
         match b {
           0 => half,
-          _ => base * half,
+          _ => gl_mul(base, half),
         },
     }
   }
@@ -286,10 +287,10 @@ def pcs := ⟦
   fn fri_fold2(index_bits: List‹G›, log_height: G, beta: Ext, e0: Ext, e1: Ext) -> Ext {
     let g = two_adic_gen(log_height + 1);
     let s = exp_by_bits(g, glist_rev(index_bits, store(ListNode.Nil)));
-    let two_s = 2 * s;
-    let t1 = ext_div(ext_add(e0, e1), [2, 0]);
-    let t2 = ext_mul(beta, ext_div(ext_sub(e0, e1), [two_s, 0]));
-    ext_add(t1, t2)
+    let two_s = gl_add(s, s);
+    let t1 = eg_div(eg_add(e0, e1), [gl_two(), gl_zero()]);
+    let t2 = eg_mul(beta, eg_div(eg_sub(e0, e1), [two_s, gl_zero()]));
+    eg_add(t1, t2)
   }
 
   -- Self-test: arity-2 fold at index 5, log_height 3 vs the `fri_fold_ref`
@@ -297,12 +298,15 @@ def pcs := ⟦
   pub fn fri_fold_test() -> G {
     let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
                        store(ListNode.Cons(1, store(ListNode.Nil)))))));
-    let e0 = [0x1111111111111111, 0x2222222222222222];
-    let e1 = [0x3333333333333333, 0x4444444444444444];
-    let beta = [0x5555555555555555, 0x6666666666666666];
+    let e0 = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8],
+              [34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8]];
+    let e1 = [[51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8],
+              [68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8]];
+    let beta = [[85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8],
+                [102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8]];
     let folded = fri_fold2(index_bits, 3, beta, e0, e1);
-    assert_eq!(folded[0], 9349172584842537206);
-    assert_eq!(folded[1], 984486879173118962);
+    assert_eq!(limb_to_field(folded[0]), 9349172584842537206);
+    assert_eq!(limb_to_field(folded[1]), 984486879173118962);
     1
   }
 
@@ -321,19 +325,19 @@ def pcs := ⟦
 
   -- The base-field query domain point x. `index_bits` = low-`log_height` index
   -- bits, LSB first (so reverse_bits_len = reversing the list).
-  fn ro_x(index_bits: List‹G›, log_height: G) -> G {
-    7 * exp_by_bits(two_adic_gen(log_height), glist_rev(index_bits, store(ListNode.Nil)))
+  fn ro_x(index_bits: List‹G›, log_height: G) -> [U8; 8] {
+    gl_mul(gl_seven(), exp_by_bits(two_adic_gen(log_height), glist_rev(index_bits, store(ListNode.Nil))))
   }
 
   -- Accumulate one matrix-point's column contributions. `q = 1/(z − x)`.
-  fn ro_fold(p_x: List‹G›, p_z: List‹Ext›, q: Ext, alpha: Ext, ro: Ext, ap: Ext)
+  fn ro_fold(p_x: List‹[U8; 8]›, p_z: List‹Ext›, q: Ext, alpha: Ext, ro: Ext, ap: Ext)
       -> (Ext, Ext) {
     match load(p_x) {
       ListNode.Nil => (ro, ap),
       ListNode.Cons(px, pxr) =>
         let &ListNode.Cons(pz, pzr) = p_z;
-        let term = ext_mul(ext_mul(ap, ext_sub(pz, [px, 0])), q);
-        ro_fold(pxr, pzr, q, alpha, ext_add(ro, term), ext_mul(ap, alpha)),
+        let term = eg_mul(eg_mul(ap, eg_sub(pz, [px, gl_zero()])), q);
+        ro_fold(pxr, pzr, q, alpha, eg_add(ro, term), eg_mul(ap, alpha)),
     }
   }
 
@@ -343,17 +347,25 @@ def pcs := ⟦
     let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
                        store(ListNode.Cons(1, store(ListNode.Nil)))))));
     let x = ro_x(index_bits, 3);
-    assert_eq!(x, 117440512);
-    let z = [0x123456789a, 0xabcdef01];
-    let alpha = [0x1111111111111111, 0x2];
-    let p_x = store(ListNode.Cons(11, store(ListNode.Cons(22,
-                store(ListNode.Cons(33, store(ListNode.Nil)))))));
-    let p_z = store(ListNode.Cons([100, 1], store(ListNode.Cons([200, 2],
-                store(ListNode.Cons([300, 3], store(ListNode.Nil)))))));
-    let q = ext_inverse(ext_sub(z, [x, 0]));
-    let (ro, _ap) = ro_fold(p_x, p_z, q, alpha, [0, 0], [1, 0]);
-    assert_eq!(ro[0], 7130765474285082575);
-    assert_eq!(ro[1], 12254464995725315436);
+    assert_eq!(limb_to_field(x), 117440512);
+    let z = [[154u8, 120u8, 86u8, 52u8, 18u8, 0u8, 0u8, 0u8],
+             [1u8, 239u8, 205u8, 171u8, 0u8, 0u8, 0u8, 0u8]];
+    let alpha = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8],
+                 [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let px0 = [11u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let px1 = [22u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let px2 = [33u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let p_x = store(ListNode.Cons(px0, store(ListNode.Cons(px1,
+                store(ListNode.Cons(px2, store(ListNode.Nil)))))));
+    let pz0 = [[100u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let pz1 = [[200u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let pz2 = [[44u8, 1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [3u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let p_z = store(ListNode.Cons(pz0, store(ListNode.Cons(pz1,
+                store(ListNode.Cons(pz2, store(ListNode.Nil)))))));
+    let q = eg_inverse(eg_sub(z, [x, gl_zero()]));
+    let (ro, _ap) = ro_fold(p_x, p_z, q, alpha, [gl_zero(), gl_zero()], [gl_one(), gl_zero()]);
+    assert_eq!(limb_to_field(ro[0]), 7130765474285082575);
+    assert_eq!(limb_to_field(ro[1]), 12254464995725315436);
     1
   }
 
diff --git a/Ix/MultiStark/SystemDeserialize.lean b/Ix/MultiStark/SystemDeserialize.lean
index 6c37ce04..fbbbc046 100644
--- a/Ix/MultiStark/SystemDeserialize.lean
+++ b/Ix/MultiStark/SystemDeserialize.lean
@@ -50,7 +50,7 @@ def systemDeserialize := ⟦
     IsFirstRow,
     IsLastRow,
     IsTransition,
-    Const(G),
+    Const([U8; 8]),             -- a non-native Goldilocks constant (canonical LE bytes)
     Add(&SymExpr, &SymExpr, G),
     Sub(&SymExpr, &SymExpr, G),
     Neg(&SymExpr, G),
@@ -99,10 +99,11 @@ def systemDeserialize := ⟦
        + 0x1000000 * to_field(b3), s3)
   }
 
-  -- A raw `u64` Goldilocks value, reduced mod p (for `Constant`).
-  fn read_field(stream: ByteStream) -> (G, ByteStream) {
+  -- A raw `u64` Goldilocks constant, canonicalized into non-native Goldilocks
+  -- bytes (`gl_reduce`) so it can feed the byte-emulated composition arithmetic.
+  fn read_field(stream: ByteStream) -> ([U8; 8], ByteStream) {
     let (u, s) = read_u64(stream);
-    (limb_to_field(u), s)
+    (gl_reduce(u), s)
   }
 
   -- ==========================================================================
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index 088d4a69..f9c067a5 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -64,7 +64,7 @@ def verifier := ⟦
   -- An extension element `[c0, c1]` (`= c0 + c1·X`) is zero iff both Goldilocks
   -- coefficients are zero. (`read_ext` already reduced the limbs mod p.)
   fn ext_is_zero(e: Ext) -> G {
-    eq_zero(e[0]) * eq_zero(e[1])
+    gl_is_zero(e[0]) * gl_is_zero(e[1])
   }
 
   -- 1 iff the LAST element of the accumulator list is the zero extension
@@ -244,7 +244,7 @@ def verifier := ⟦
   fn pcs_sample_ext(input: ByteStream, output: ByteStream)
       -> (Ext, ByteStream, ByteStream) {
     let (c0, c1, i1, o1) = ch_sample_ext(input, output);
-    ([limb_to_field(c0), limb_to_field(c1)], i1, o1)
+    ([gl_reduce(c0), gl_reduce(c1)], i1, o1)
   }
 
   -- Self-test: replay the synthetic PCS challenger sequence from
@@ -259,16 +259,16 @@ def verifier := ⟦
     -- α_pcs (output empty ⇒ flush), then α_fri (CONSECUTIVE ⇒ thread output).
     let (apcs, input, o1) = pcs_sample_ext(input, store(ListNode.Nil));
     let (afri, input, o2) = pcs_sample_ext(input, o1);
-    assert_eq!(apcs[0], 2882912772410685996);
-    assert_eq!(apcs[1], 910933442133595775);
-    assert_eq!(afri[0], 14440140149289897216);
-    assert_eq!(afri[1], 8092267645441512944);
+    assert_eq!(limb_to_field(apcs[0]), 2882912772410685996);
+    assert_eq!(limb_to_field(apcs[1]), 910933442133595775);
+    assert_eq!(limb_to_field(afri[0]), 14440140149289897216);
+    assert_eq!(limb_to_field(afri[1]), 8092267645441512944);
     -- observe commit (clears output), sample β.
     let v2 = [239u8, 190u8, 173u8, 222u8, 0u8, 0u8, 0u8, 0u8]; -- 0x00000000deadbeef
     let (input, _oc) = ch_observe_val(input, v2);
     let (beta, input, _ob) = pcs_sample_ext(input, store(ListNode.Nil));
-    assert_eq!(beta[0], 10456048119516576995);
-    assert_eq!(beta[1], 3173538015651228593);
+    assert_eq!(limb_to_field(beta[0]), 10456048119516576995);
+    assert_eq!(limb_to_field(beta[1]), 3173538015651228593);
     -- observe final_poly coeff + log_arity (each a Val), then sample the index.
     let v3 = [4u8, 3u8, 2u8, 1u8, 13u8, 12u8, 11u8, 10u8]; -- 0x0a0b0c0d01020304
     let v4 = [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];     -- 0x0000000000000002
@@ -342,10 +342,10 @@ def verifier := ⟦
     let input = snoc_cap(input, q);
     -- sample out-of-domain point ζ; keep the resulting `input` for the PCS phase
     let (z0, z1, zinput, _oz) = ch_sample_ext(input, store(ListNode.Nil));
-    ([limb_to_field(l0), limb_to_field(l1)],
-     [limb_to_field(f0), limb_to_field(f1)],
-     [limb_to_field(a0), limb_to_field(a1)],
-     [limb_to_field(z0), limb_to_field(z1)],
+    ([gl_reduce(l0), gl_reduce(l1)],
+     [gl_reduce(f0), gl_reduce(f1)],
+     [gl_reduce(a0), gl_reduce(a1)],
+     [gl_reduce(z0), gl_reduce(z1)],
      zinput)
   }
 
@@ -359,54 +359,54 @@ def verifier := ⟦
   fn ext_exp_pow2(e: Ext, k: G) -> Ext {
     match k {
       0 => e,
-      _ => ext_exp_pow2(ext_mul(e, e), k - 1),
+      _ => ext_exp_pow2(eg_mul(e, e), k - 1),
     }
   }
 
   -- `two_adic_generator(bits)` — a primitive 2^bits root of unity in Goldilocks
   -- (`Plonky3/goldilocks/src/goldilocks.rs::TWO_ADIC_GENERATORS`).
-  fn two_adic_gen(bits: G) -> G {
+  fn two_adic_gen(bits: G) -> [U8; 8] {
     match bits {
-      0  => 0x0000000000000001,
-      1  => 0xffffffff00000000,
-      2  => 0x0001000000000000,
-      3  => 0xfffffffeff000001,
-      4  => 0xefffffff00000001,
-      5  => 0x00003fffffffc000,
-      6  => 0x0000008000000000,
-      7  => 0xf80007ff08000001,
-      8  => 0xbf79143ce60ca966,
-      9  => 0x1905d02a5c411f4e,
-      10 => 0x9d8f2ad78bfed972,
-      11 => 0x0653b4801da1c8cf,
-      12 => 0xf2c35199959dfcb6,
-      13 => 0x1544ef2335d17997,
-      14 => 0xe0ee099310bba1e2,
-      15 => 0xf6b2cffe2306baac,
-      16 => 0x54df9630bf79450e,
-      17 => 0xabd0a6e8aa3d8a0e,
-      18 => 0x81281a7b05f9beac,
-      19 => 0xfbd41c6b8caa3302,
-      20 => 0x30ba2ecd5e93e76d,
-      21 => 0xf502aef532322654,
-      22 => 0x4b2a18ade67246b5,
-      23 => 0xea9d5a1336fbc98b,
-      24 => 0x86cdcc31c307e171,
-      25 => 0x4bbaf5976ecfefd8,
-      26 => 0xed41d05b78d6e286,
-      27 => 0x10d78dd8915a171d,
-      28 => 0x59049500004a4485,
-      29 => 0xdfa8c93ba46d2666,
-      30 => 0x7e9bd009b86a0845,
-      31 => 0x400a7f755588e659,
-      _  => 0x185629dcda58878c,
+      0  => [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8],
+      1  => [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8],
+      2  => [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 1u8, 0u8],
+      3  => [1u8, 0u8, 0u8, 255u8, 254u8, 255u8, 255u8, 255u8],
+      4  => [1u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 239u8],
+      5  => [0u8, 192u8, 255u8, 255u8, 255u8, 63u8, 0u8, 0u8],
+      6  => [0u8, 0u8, 0u8, 0u8, 128u8, 0u8, 0u8, 0u8],
+      7  => [1u8, 0u8, 0u8, 8u8, 255u8, 7u8, 0u8, 248u8],
+      8  => [102u8, 169u8, 12u8, 230u8, 60u8, 20u8, 121u8, 191u8],
+      9  => [78u8, 31u8, 65u8, 92u8, 42u8, 208u8, 5u8, 25u8],
+      10 => [114u8, 217u8, 254u8, 139u8, 215u8, 42u8, 143u8, 157u8],
+      11 => [207u8, 200u8, 161u8, 29u8, 128u8, 180u8, 83u8, 6u8],
+      12 => [182u8, 252u8, 157u8, 149u8, 153u8, 81u8, 195u8, 242u8],
+      13 => [151u8, 121u8, 209u8, 53u8, 35u8, 239u8, 68u8, 21u8],
+      14 => [226u8, 161u8, 187u8, 16u8, 147u8, 9u8, 238u8, 224u8],
+      15 => [172u8, 186u8, 6u8, 35u8, 254u8, 207u8, 178u8, 246u8],
+      16 => [14u8, 69u8, 121u8, 191u8, 48u8, 150u8, 223u8, 84u8],
+      17 => [14u8, 138u8, 61u8, 170u8, 232u8, 166u8, 208u8, 171u8],
+      18 => [172u8, 190u8, 249u8, 5u8, 123u8, 26u8, 40u8, 129u8],
+      19 => [2u8, 51u8, 170u8, 140u8, 107u8, 28u8, 212u8, 251u8],
+      20 => [109u8, 231u8, 147u8, 94u8, 205u8, 46u8, 186u8, 48u8],
+      21 => [84u8, 38u8, 50u8, 50u8, 245u8, 174u8, 2u8, 245u8],
+      22 => [181u8, 70u8, 114u8, 230u8, 173u8, 24u8, 42u8, 75u8],
+      23 => [139u8, 201u8, 251u8, 54u8, 19u8, 90u8, 157u8, 234u8],
+      24 => [113u8, 225u8, 7u8, 195u8, 49u8, 204u8, 205u8, 134u8],
+      25 => [216u8, 239u8, 207u8, 110u8, 151u8, 245u8, 186u8, 75u8],
+      26 => [134u8, 226u8, 214u8, 120u8, 91u8, 208u8, 65u8, 237u8],
+      27 => [29u8, 23u8, 90u8, 145u8, 216u8, 141u8, 215u8, 16u8],
+      28 => [133u8, 68u8, 74u8, 0u8, 0u8, 149u8, 4u8, 89u8],
+      29 => [102u8, 38u8, 109u8, 164u8, 59u8, 201u8, 168u8, 223u8],
+      30 => [69u8, 8u8, 106u8, 184u8, 9u8, 208u8, 155u8, 126u8],
+      31 => [89u8, 230u8, 136u8, 85u8, 117u8, 127u8, 10u8, 64u8],
+      _  => [140u8, 135u8, 88u8, 218u8, 220u8, 41u8, 86u8, 24u8],
     }
   }
 
   -- Vanishing polynomial of the trace domain (shift = 1, size 2^L) at point ζ:
   -- `Z_H(ζ) = ζ^(2^L) - 1`.
   fn trace_vanishing(zeta: Ext, l: G) -> Ext {
-    ext_sub(ext_exp_pow2(zeta, l), [1, 0])
+    eg_sub(ext_exp_pow2(zeta, l), [gl_one(), gl_zero()])
   }
 
   -- Lagrange selectors at ζ for the trace domain (shift = 1), mirroring
@@ -418,11 +418,11 @@ def verifier := ⟦
   -- where g = two_adic_gen(L) is the subgroup generator.
   fn trace_selectors(zeta: Ext, l: G) -> (Ext, Ext, Ext, Ext) {
     let zh = trace_vanishing(zeta, l);
-    let ginv = g_inverse(two_adic_gen(l));
-    let is_first = ext_div(zh, ext_sub(zeta, [1, 0]));
-    let is_last = ext_div(zh, ext_sub(zeta, [ginv, 0]));
-    let is_trans = ext_sub(zeta, [ginv, 0]);
-    let inv_van = ext_inverse(zh);
+    let ginv = gl_inverse(two_adic_gen(l));
+    let is_first = eg_div(zh, eg_sub(zeta, [gl_one(), gl_zero()]));
+    let is_last = eg_div(zh, eg_sub(zeta, [ginv, gl_zero()]));
+    let is_trans = eg_sub(zeta, [ginv, gl_zero()]);
+    let inv_van = eg_inverse(zh);
     (is_first, is_last, is_trans, inv_van)
   }
 
@@ -473,13 +473,13 @@ def verifier := ⟦
   -- One Horner fold step of the constraint folder: `acc := acc·α + x`
   -- (`VerifierConstraintFolder::assert_zero` / `assert_zero_ext`).
   fn ood_fold(acc: Ext, alpha: Ext, x: Ext) -> Ext {
-    ext_add(ext_mul(acc, alpha), x)
+    eg_add(eg_mul(acc, alpha), x)
   }
 
   -- Reconstruct an extension element from its two opened base coordinates,
   -- `from_ext_basis([c0, c1]) = c0 + c1·X` (the ExtVal basis is `[1, X]`).
   fn from_ext_basis(c0: Ext, c1: Ext) -> Ext {
-    ext_add(c0, ext_mul(c1, [0, 1]))
+    eg_add(c0, eg_mul(c1, [gl_zero(), gl_one()]))
   }
 
   -- A stage-2 / quotient opened row arrives as `stage_2_width·2` extension
@@ -504,19 +504,19 @@ def verifier := ⟦
         match entry {
           SysEntry.Main(_o) => list_lookup(main, idx),
           SysEntry.Preprocessed(_o) => list_lookup(prep, idx),
-          SysEntry.Stage2(_o) => [0, 0],
-          SysEntry.Public => [0, 0],
-          SysEntry.Stage2Public => [0, 0],
-          SysEntry.Challenge => [0, 0],
+          SysEntry.Stage2(_o) => [gl_zero(), gl_zero()],
+          SysEntry.Public => [gl_zero(), gl_zero()],
+          SysEntry.Stage2Public => [gl_zero(), gl_zero()],
+          SysEntry.Challenge => [gl_zero(), gl_zero()],
         },
-      SymExpr.IsFirstRow => [0, 0],
-      SymExpr.IsLastRow => [0, 0],
-      SymExpr.IsTransition => [0, 0],
-      SymExpr.Const(c) => [c, 0],
-      SymExpr.Add(x, y, _d) => ext_add(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
-      SymExpr.Sub(x, y, _d) => ext_sub(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
-      SymExpr.Neg(x, _d) => ext_neg(eval_sym(load(x), main, prep)),
-      SymExpr.Mul(x, y, _d) => ext_mul(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
+      SymExpr.IsFirstRow => [gl_zero(), gl_zero()],
+      SymExpr.IsLastRow => [gl_zero(), gl_zero()],
+      SymExpr.IsTransition => [gl_zero(), gl_zero()],
+      SymExpr.Const(c) => [c, gl_zero()],
+      SymExpr.Add(x, y, _d) => eg_add(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
+      SymExpr.Sub(x, y, _d) => eg_sub(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
+      SymExpr.Neg(x, _d) => eg_neg(eval_sym(load(x), main, prep)),
+      SymExpr.Mul(x, y, _d) => eg_mul(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)),
     }
   }
 
@@ -524,9 +524,9 @@ def verifier := ⟦
   -- `lookup.rs::fingerprint`).
   fn fingerprint_ext(r: Ext, args: List‹SymExpr›, main: List‹Ext›, prep: List‹Ext›) -> Ext {
     match load(args) {
-      ListNode.Nil => [0, 0],
+      ListNode.Nil => [gl_zero(), gl_zero()],
       ListNode.Cons(a, rest) =>
-        ext_add(eval_sym(a, main, prep), ext_mul(r, fingerprint_ext(r, rest, main, prep))),
+        eg_add(eval_sym(a, main, prep), eg_mul(r, fingerprint_ext(r, rest, main, prep))),
     }
   }
 
@@ -547,7 +547,7 @@ def verifier := ⟦
       0 => acc,
       _ =>
         let x = list_lookup(main, idx);
-        let bc = ext_mul(x, ext_sub(x, [1, 0]));
+        let bc = eg_mul(x, eg_sub(x, [gl_one(), gl_zero()]));
         fold_sel_bools(ood_fold(acc, alpha, bc), alpha, main, idx + 1, count - 1),
     }
   }
@@ -568,10 +568,10 @@ def verifier := ⟦
         let minv = list_lookup(s2row, k + 1);
         let mult = eval_sym(mult_e, main, prep);
         let fp = fingerprint_ext(fch, args, main, prep);
-        let message = ext_add(lch, fp);
-        let c = ext_sub(ext_mul(message, minv), [1, 0]);
+        let message = eg_add(lch, fp);
+        let c = eg_sub(eg_mul(message, minv), [gl_one(), gl_zero()]);
         let acc = ood_fold(acc, alpha, c);
-        let acc_expr = ext_add(acc_expr, ext_mul(mult, minv));
+        let acc_expr = eg_add(acc_expr, eg_mul(mult, minv));
         fold_lookups(acc, alpha, rest, k + 1, main, prep, s2row, lch, fch, acc_expr),
     }
   }
@@ -590,7 +590,7 @@ def verifier := ⟦
     match air {
       SysAir.Function(c) =>
         let SysConstraints.Mk(zeros, ss, se, _w) = c;
-        let acc = fold_zeros([0, 0], alpha, zeros, main);
+        let acc = fold_zeros([gl_zero(), gl_zero()], alpha, zeros, main);
         let acc = fold_sel_bools(acc, alpha, main, ss, se - ss);
         ood_comp_tail(acc, lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
       SysAir.Memory(m) =>
@@ -601,18 +601,18 @@ def verifier := ⟦
         let is_real_next = list_lookup(main_next, 1);
         let ptr_next = list_lookup(main_next, 2);
         -- assert_bool(is_real)
-        let acc = ood_fold([0, 0], alpha, ext_mul(is_real, ext_sub(is_real, [1, 0])));
+        let acc = ood_fold([gl_zero(), gl_zero()], alpha, eg_mul(is_real, eg_sub(is_real, [gl_one(), gl_zero()])));
         -- is_real_transition = is_real_next · is_transition
-        let irt = ext_mul(is_real_next, ist);
+        let irt = eg_mul(is_real_next, ist);
         -- when(irt).assert_one(is_real) = irt·(is_real - 1)
-        let acc = ood_fold(acc, alpha, ext_mul(irt, ext_sub(is_real, [1, 0])));
+        let acc = ood_fold(acc, alpha, eg_mul(irt, eg_sub(is_real, [gl_one(), gl_zero()])));
         -- when(irt).assert_eq(ptr+1, ptr_next) = irt·(ptr + 1 - ptr_next)
-        let acc = ood_fold(acc, alpha, ext_mul(irt, ext_sub(ext_add(ptr, [1, 0]), ptr_next)));
+        let acc = ood_fold(acc, alpha, eg_mul(irt, eg_sub(eg_add(ptr, [gl_one(), gl_zero()]), ptr_next)));
         ood_comp_tail(acc, lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
       SysAir.Bytes1 =>
-        ood_comp_tail([0, 0], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
+        ood_comp_tail([gl_zero(), gl_zero()], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
       SysAir.Bytes2 =>
-        ood_comp_tail([0, 0], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
+        ood_comp_tail([gl_zero(), gl_zero()], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha),
     }
   }
 
@@ -626,11 +626,11 @@ def verifier := ⟦
     let acc_col = list_lookup(s2row, 0);
     let next_acc_col = list_lookup(s2next, 0);
     -- when_first_row: acc_col = accp
-    let acc = ood_fold(acc, alpha, ext_mul(isf, ext_sub(acc_col, accp)));
+    let acc = ood_fold(acc, alpha, eg_mul(isf, eg_sub(acc_col, accp)));
     -- when_transition: acc_expr = next_acc_col
-    let acc = ood_fold(acc, alpha, ext_mul(ist, ext_sub(acc_expr, next_acc_col)));
+    let acc = ood_fold(acc, alpha, eg_mul(ist, eg_sub(acc_expr, next_acc_col)));
     -- when_last_row: acc_expr = naccp
-    ood_fold(acc, alpha, ext_mul(isl, ext_sub(acc_expr, naccp)))
+    ood_fold(acc, alpha, eg_mul(isl, eg_sub(acc_expr, naccp)))
   }
 
   -- ==========================================================================
@@ -645,50 +645,51 @@ def verifier := ⟦
   -- with `Z_{Dₖ}(x) = (x · shift_k⁻¹)^(2^L) - 1`.
   -- ==========================================================================
 
-  -- base-field power `base^e` (e small: the chunk index, < qd).
-  fn g_pow(base: G, e: G) -> G {
+  -- base-field power `base^e` (e small: the chunk index, < qd). `base` is a
+  -- non-native Goldilocks element; `e` is a native loop counter.
+  fn g_pow(base: [U8; 8], e: G) -> [U8; 8] {
     match e {
-      0 => 1,
-      _ => base * g_pow(base, e - 1),
+      0 => gl_one(),
+      _ => gl_mul(base, g_pow(base, e - 1)),
     }
   }
 
   -- `Z_{Dⱼ}(x) = (x · shift_j⁻¹)^(2^L) - 1`, evaluated at extension point `x`.
-  fn vanish_chunk(x: Ext, l: G, shiftinv: G) -> Ext {
-    ext_sub(ext_exp_pow2(ext_mul(x, [shiftinv, 0]), l), [1, 0])
+  fn vanish_chunk(x: Ext, l: G, shiftinv: [U8; 8]) -> Ext {
+    eg_sub(ext_exp_pow2(eg_mul(x, [shiftinv, gl_zero()]), l), [gl_one(), gl_zero()])
   }
 
   -- `zpsₜ = Πⱼ≠ₜ Z_{Dⱼ}(ζ) / Z_{Dⱼ}(shift_t)`. Iterates j over `[jidx, jidx+rem)`.
-  fn zps_prod(acc: Ext, zeta: Ext, l: G, g_q: G, shift_t: G, jidx: G, rem: G, t: G) -> Ext {
+  fn zps_prod(acc: Ext, zeta: Ext, l: G, g_q: [U8; 8], shift_t: [U8; 8], jidx: G, rem: G, t: G) -> Ext {
     match rem {
       0 => acc,
       _ =>
-        let shiftinv = g_inverse(7 * g_pow(g_q, jidx));
+        let shiftinv = gl_inverse(gl_mul(gl_seven(), g_pow(g_q, jidx)));
         -- skip the j = t factor (the chunk's own domain); branch in tail
         -- position so the inner match is not a non-tail match.
         match eq_zero(jidx - t) {
           1 => zps_prod(acc, zeta, l, g_q, shift_t, jidx + 1, rem - 1, t),
           _ =>
-            let factor = ext_mul(vanish_chunk(zeta, l, shiftinv),
-                                 ext_inverse(vanish_chunk([shift_t, 0], l, shiftinv)));
-            zps_prod(ext_mul(acc, factor), zeta, l, g_q, shift_t, jidx + 1, rem - 1, t),
+            let factor = eg_mul(vanish_chunk(zeta, l, shiftinv),
+                                 eg_inverse(vanish_chunk([shift_t, gl_zero()], l, shiftinv)));
+            zps_prod(eg_mul(acc, factor), zeta, l, g_q, shift_t, jidx + 1, rem - 1, t),
         },
     }
   }
 
   -- `quotient(ζ) = Σₜ zpsₜ · from_ext_basis(chunkₜ)`, iterating the `qd` chunks
   -- (`q_opened[idx][0] = [c0, c1]`).
-  fn quotient_sum(acc: Ext, zeta: Ext, l: G, qd: G, g_q: G,
+  fn quotient_sum(acc: Ext, zeta: Ext, l: G, qd: G, g_q: [U8; 8],
       q_opened: OpenedRound, idx: G, rem: G, t: G) -> Ext {
     match rem {
       0 => acc,
       _ =>
-        let shift_t = 7 * g_pow(g_q, t);
-        let zps_t = zps_prod([1, 0], zeta, l, g_q, shift_t, 0, qd, t);
+        let shift_t = gl_mul(gl_seven(), g_pow(g_q, t));
+        let zps_t = zps_prod([gl_one(), gl_zero()], zeta, l, g_q, shift_t, 0, qd, t);
         let ch = list_lookup(q_opened, idx);
         let row = list_lookup(ch, 0);
         let qv = from_ext_basis(list_lookup(row, 0), list_lookup(row, 1));
-        quotient_sum(ext_add(acc, ext_mul(zps_t, qv)), zeta, l, qd, g_q,
+        quotient_sum(eg_add(acc, eg_mul(zps_t, qv)), zeta, l, qd, g_q,
                      q_opened, idx + 1, rem - 1, t + 1),
     }
   }
@@ -760,8 +761,8 @@ def verifier := ⟦
         let comp = ood_composition(air, lookups, main, main_next, s2row, s2next,
                                    prep, isf, isl, ist, lch, fch, accp, naccp, alpha);
         let g_q = two_adic_gen(l + log_qd);
-        let quot = quotient_sum([0, 0], zeta, l, qd, g_q, q_opened, lastq, qd, 0);
-        assert_eq!(ext_mul(comp, invv), quot);
+        let quot = quotient_sum([gl_zero(), gl_zero()], zeta, l, qd, g_q, q_opened, lastq, qd, 0);
+        assert_eq!(eg_eq(eg_mul(comp, invv), quot), 1);
         ood_loop(rest, prep_indices, log_degrees, accs, stage1, stage2, prep_opt,
                  q_opened, i + 1, naccp, lastq + qd, lch, fch, alpha, zeta),
     }
@@ -771,9 +772,9 @@ def verifier := ⟦
   -- its raw u64 limb to an extension element). Mirrors `lookup::fingerprint`.
   fn fingerprint_vals(fch: Ext, vals: List‹U64›) -> Ext {
     match load(vals) {
-      ListNode.Nil => [0, 0],
+      ListNode.Nil => [gl_zero(), gl_zero()],
       ListNode.Cons(v, rest) =>
-        ext_add([limb_to_field(v), 0], ext_mul(fch, fingerprint_vals(fch, rest))),
+        eg_add([gl_reduce(v), gl_zero()], eg_mul(fch, fingerprint_vals(fch, rest))),
     }
   }
 
@@ -784,8 +785,8 @@ def verifier := ⟦
     match load(claims) {
       ListNode.Nil => acc,
       ListNode.Cons(c, rest) =>
-        let msg = ext_add(lch, fingerprint_vals(fch, c));
-        claims_acc(ext_add(acc, ext_inverse(msg)), rest, lch, fch),
+        let msg = eg_add(lch, fingerprint_vals(fch, c));
+        claims_acc(eg_add(acc, eg_inverse(msg)), rest, lch, fch),
     }
   }
 
@@ -802,7 +803,7 @@ def verifier := ⟦
         let Commitments.Mk(s1c, s2c, qc) = commitments;
         let prep_cap = opt_commit_cap(commit);
         let (lch, fch, alpha, zeta, _post_zeta_input) = fiat_shamir(prep_cap, s1c, s2c, qc, log_degrees, claims);
-        let acc0 = claims_acc([0, 0], claims, lch, fch);
+        let acc0 = claims_acc([gl_zero(), gl_zero()], claims, lch, fch);
         ood_loop(circuits, prep_indices, log_degrees, accs, stage1, stage2,
                  prep_opt, q_opened, 0, acc0, 0, lch, fch, alpha, zeta),
     }

From 4832b74868f230db411cd634dcf1f0807bfecc8b Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Tue, 9 Jun 2026 19:12:28 -0300
Subject: [PATCH 24/28] PCS verifier finished

---
 Ix/MultiStark.lean          |   4 +-
 Ix/MultiStark/Pcs.lean      | 447 +++++++++++++++++++++++++++++++++++-
 Ix/MultiStark/Verifier.lean |  29 ++-
 RecursiveVerifier.lean      |  34 +--
 4 files changed, 483 insertions(+), 31 deletions(-)

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index b7612877..5a534bd5 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -32,7 +32,7 @@ def entrypoints := ⟦
   -- deserialize into a `Proof` object (asserting the whole stream was consumed),
   -- then recompute keccak-256 over the same bytes and assert it equals `digest`
   -- — binding the IO-fed bytes to the public commitment.
-  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4], system_digest: [[U8; 8]; 4], claims_digest: [[U8; 8]; 4]) {
+  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4], system_digest: [[U8; 8]; 4], claims_digest: [[U8; 8]; 4], num_queries: G, commit_pow_bits: G) {
     -- Proof from IO key [0]: deserialize, assert fully consumed, and bind the
     -- bytes to the public keccak-256 `digest`.
     let (idx, len) = io_get_info([0]);
@@ -60,7 +60,7 @@ def entrypoints := ⟦
     assert_eq!(verify(proof), 1);
     -- Step 3 + 5: prover-faithful Fiat-Shamir replay and the out-of-domain
     -- composition/quotient check, `composition(ζ)·inv_vanishing(ζ) == quotient(ζ)`.
-    assert_eq!(ood_verify(sys, proof, claims), 1);
+    assert_eq!(ood_verify(sys, proof, claims, num_queries, commit_pow_bits), 1);
     ()
   }
 ⟧
diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean
index f05f5373..2bab129d 100644
--- a/Ix/MultiStark/Pcs.lean
+++ b/Ix/MultiStark/Pcs.lean
@@ -196,9 +196,19 @@ def pcs := ⟦
     }
   }
 
+  -- Canonicalize each lane: the MMCS leaf hash serializes `as_canonical_u64`,
+  -- but opened base values are on the wire in the (possibly non-canonical)
+  -- internal Goldilocks repr — e.g. field zero ships as `p`. `gl_reduce` maps
+  -- them to `< p` before hashing (idempotent on already-canonical lanes).
+  fn canon_lanes(l: List‹U64›) -> List‹U64› {
+    match load(l) {
+      ListNode.Nil => store(ListNode.Nil),
+      ListNode.Cons(x, rest) => store(ListNode.Cons(gl_reduce(x), canon_lanes(rest))),
+    }
+  }
   -- The joint leaf hash of all matrices at log-height `target`.
   fn leaf_hash_at(rows: List‹List‹U64››, lhs: List‹G›, target: G) -> Digest {
-    pf_sponge_u64(concat_at(rows, lhs, target))
+    pf_sponge_u64(canon_lanes(concat_at(rows, lhs, target)))
   }
 
   -- Inject the leaf hash of any matrices at log-height `lh` (if present) via a
@@ -370,15 +380,436 @@ def pcs := ⟦
   }
 
   -- ==========================================================================
-  -- PCS verification entry (STILL A STUB).
-  -- Accepts any FRI opening proof. A real implementation will check query
-  -- openings, Merkle paths (via `mmcs_*` above) and the FRI folding against the
-  -- committed roots and the (threaded) Fiat-Shamir challenger.
+  -- PCS (FRI) verification — `two_adic_pcs::verify` + `fri::verify_fri`.
+  --
+  -- Specialised to `innerFri`: arity 2 (log_arity = 1 every round),
+  -- log_blowup = 1, log_final_poly_len = 0 (final_poly is ONE constant
+  -- coefficient ⇒ the final Horner eval is just `final_poly[0]`, no `x` needed),
+  -- num_queries = 1, commit/query PoW bits = 0 (a challenger no-op). Field
+  -- arithmetic is the non-native byte Goldilocks (`gl_*`/`eg_*`).
+  --
+  -- A reduced-opening accumulator, one per distinct log-height. `alpha_pow`
+  -- threads across every (batch, matrix, point, column) at that height, in the
+  -- prover's observation order (stage_1, stage_2, quotient, preprocessed).
   -- ==========================================================================
-  fn pcs_verify(opening: FriProof) -> G {
+  enum Bucket { Mk(G, Ext, Ext) }   -- log_height, alpha_pow, reduced_opening
+
+  -- ── challenger: observe the opened values (observe_algebra_slice) ──────────
+  -- One ext element = its two base coordinates, each 8 LE bytes.
+  fn obs_ext_row(input: ByteStream, row: List‹Ext›) -> ByteStream {
+    match load(row) {
+      ListNode.Nil => input,
+      ListNode.Cons(e, rest) => obs_ext_row(snoc_b8(snoc_b8(input, e[0]), e[1]), rest),
+    }
+  }
+  fn obs_points(input: ByteStream, pts: List‹List‹Ext››) -> ByteStream {
+    match load(pts) {
+      ListNode.Nil => input,
+      ListNode.Cons(row, rest) => obs_points(obs_ext_row(input, row), rest),
+    }
+  }
+  fn obs_round(input: ByteStream, round: OpenedRound) -> ByteStream {
+    match load(round) {
+      ListNode.Nil => input,
+      ListNode.Cons(mat, rest) => obs_round(obs_points(input, mat), rest),
+    }
+  }
+  fn obs_prep(input: ByteStream, prep_opt: PreprocessedOpt) -> ByteStream {
+    match prep_opt {
+      PreprocessedOpt.NoPreprocessed => input,
+      PreprocessedOpt.SomePreprocessed(round) => obs_round(input, round),
+    }
+  }
+  -- Observe one Val (= 1) per FRI round, the variable-arity schedule.
+  fn obs_log_arities(input: ByteStream, comms: List‹MerkleCap›) -> ByteStream {
+    match load(comms) {
+      ListNode.Nil => input,
+      ListNode.Cons(_c, rest) =>
+        obs_log_arities(snoc_b8(input, [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), rest),
+    }
+  }
+  -- Per round: observe the commitment, then sample β (PoW bits = 0 ⇒ no-op).
+  -- `GrindingChallenger::check_witness` for one commit round: when `bits > 0`,
+  -- observe the PoW witness then sample `bits` bits and assert they are all zero
+  -- (else InvalidPowWitness). Returns the post-PoW `(input, output)` so the
+  -- immediately-following β sample continues the SAME hash stream (no observe in
+  -- between). `bits == 0` is the short-circuit: no observe, no sample.
+  fn pcs_commit_pow(input: ByteStream, witness: U64, bits: G) -> (ByteStream, ByteStream) {
+    match bits {
+      0 => (input, store(ListNode.Nil)),
+      _ =>
+        let (pbits, i1, o1) = ch_sample_bits(snoc_b8(input, witness), store(ListNode.Nil), bits);
+        assert_eq!(bits_to_num(pbits), 0);
+        (i1, o1),
+    }
+  }
+  -- Per round: observe the commitment, run the commit-phase PoW check, then
+  -- sample the folding challenge β (continuing the challenger past the PoW).
+  fn pcs_betas(input: ByteStream, comms: List‹MerkleCap›, witnesses: List‹U64›, bits: G)
+      -> (List‹Ext›, ByteStream) {
+    match load(comms) {
+      ListNode.Nil => (store(ListNode.Nil), input),
+      ListNode.Cons(c, rest) =>
+        let &ListNode.Cons(w, wrest) = witnesses;
+        let (i1, o1) = pcs_commit_pow(snoc_cap(input, c), w, bits);
+        let (b0, b1, i2, _o) = ch_sample_ext(i1, o1);
+        let (bs, i3) = pcs_betas(i2, rest, wrest, bits);
+        (store(ListNode.Cons([gl_reduce(b0), gl_reduce(b1)], bs)), i3),
+    }
+  }
+
+  -- ── reduced-opening buckets ───────────────────────────────────────────────
+  fn repeat_g(v: G, n: G) -> List‹G› {
+    match n {
+      0 => store(ListNode.Nil),
+      _ => store(ListNode.Cons(v, repeat_g(v, n - 1))),
+    }
+  }
+  -- 1 iff some circuit `i < rem` has log-height `log_degrees[i] + log_blowup == h`.
+  fn circ_has_height(log_degrees: List‹U8›, log_blowup: G, rem: G, i: G, h: G) -> G {
+    match rem {
+      0 => 0,
+      _ => match eq_zero(to_field(list_lookup(log_degrees, i)) + log_blowup - h) {
+        1 => 1,
+        _ => circ_has_height(log_degrees, log_blowup, rem - 1, i + 1, h),
+      },
+    }
+  }
+  -- One bucket per distinct log-height, built DESCENDING by counting `h` down
+  -- from `log_global_max`. Each starts (alpha_pow = 1, reduced_opening = 0).
+  fn build_buckets(log_degrees: List‹U8›, log_blowup: G, num_circuits: G, h: G) -> List‹Bucket› {
+    match h {
+      0 => store(ListNode.Nil),
+      _ => match circ_has_height(log_degrees, log_blowup, num_circuits, 0, h) {
+        0 => build_buckets(log_degrees, log_blowup, num_circuits, h - 1),
+        _ => store(ListNode.Cons(
+               Bucket.Mk(h, [gl_one(), gl_zero()], [gl_zero(), gl_zero()]),
+               build_buckets(log_degrees, log_blowup, num_circuits, h - 1))),
+      },
+    }
+  }
+  -- Find the bucket at log-height `lh`, fold one matrix-point's columns into it
+  -- (`ro_fold` threads its `alpha_pow`), and write it back.
+  fn bucket_update(buckets: List‹Bucket›, lh: G, p_x: List‹[U8; 8]›, p_z: List‹Ext›,
+      q: Ext, alpha: Ext) -> List‹Bucket› {
+    match load(buckets) {
+      ListNode.Nil => store(ListNode.Nil),
+      ListNode.Cons(b, rest) =>
+        let Bucket.Mk(h, ap, ro) = b;
+        match eq_zero(h - lh) {
+          1 =>
+            let (ro2, ap2) = ro_fold(p_x, p_z, q, alpha, ro, ap);
+            store(ListNode.Cons(Bucket.Mk(h, ap2, ro2), rest)),
+          _ => store(ListNode.Cons(b, bucket_update(rest, lh, p_x, p_z, q, alpha))),
+        },
+    }
+  }
+  -- A bucket at log-height `log_blowup` would come from a trace matrix of height
+  -- 1 (a constant polynomial); then `(f(ζ) − f(x))/(ζ − x)` must be 0. Assert it
+  -- (`open_input`'s `FinalPolyMismatch` guard). No-op if no such bucket exists.
+  fn assert_blowup_zero(buckets: List‹Bucket›, log_blowup: G) -> G {
+    match load(buckets) {
+      ListNode.Nil => 1,
+      ListNode.Cons(b, rest) =>
+        let Bucket.Mk(h, _ap, ro) = b;
+        match eq_zero(h - log_blowup) {
+          1 => assert_eq!(eg_eq(ro, [gl_zero(), gl_zero()]), 1); 1,
+          _ => assert_blowup_zero(rest, log_blowup),
+        },
+    }
+  }
+  -- 1 iff the proof carries a preprocessed opened round (used for the input-proof
+  -- batch-count check).
+  fn prep_count(prep_opt: PreprocessedOpt) -> G {
+    match prep_opt {
+      PreprocessedOpt.NoPreprocessed => 0,
+      PreprocessedOpt.SomePreprocessed(_r) => 1,
+    }
+  }
+  -- Compute x = GENERATOR·g^{revbits} for this height and fold the contribution.
+  fn ri_apply(buckets: List‹Bucket›, lh: G, idxbits: List‹G›, log_gmax: G,
+      z: Ext, p_x: List‹[U8; 8]›, p_z: List‹Ext›, alpha: Ext) -> List‹Bucket› {
+    -- the base opening and the ext opening at this point must have equal width
+    -- (PointEvaluationCountMismatch); `ro_fold` walks them in lockstep.
+    assert_eq!(eq_zero(list_length(p_x) - list_length(p_z)), 1);
+    let x = ro_x(list_drop(idxbits, log_gmax - lh), lh);
+    let q = eg_inverse(eg_sub(z, [x, gl_zero()]));
+    bucket_update(buckets, lh, p_x, p_z, q, alpha)
+  }
+
+  -- A stage_1/stage_2/preprocessed-style matrix: two opening points
+  -- (ζ, ζ·g) with the same base row `p_x`. `g` = trace subgroup generator.
+  fn open_2pt_mat(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, lh: G,
+      ldeg: G, zeta: Ext, p_x: List‹[U8; 8]›, mat: List‹List‹Ext››, alpha: Ext)
+      -> List‹Bucket› {
+    let pz0 = list_lookup(mat, 0);
+    let pz1 = list_lookup(mat, 1);
+    let zn = eg_mul(zeta, [two_adic_gen(ldeg), gl_zero()]);
+    let b1 = ri_apply(buckets, lh, idxbits, log_gmax, zeta, p_x, pz0, alpha);
+    ri_apply(b1, lh, idxbits, log_gmax, zn, p_x, pz1, alpha)
+  }
+  fn open_batch_2pt(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, log_blowup: G, ci: G,
+      rem: G, log_degrees: List‹U8›, zeta: Ext, base_rows: List‹List‹U64››,
+      opened: OpenedRound, alpha: Ext) -> List‹Bucket› {
+    match rem {
+      0 => buckets,
+      _ =>
+        let ldeg = to_field(list_lookup(log_degrees, ci));
+        let b = open_2pt_mat(buckets, idxbits, log_gmax, ldeg + log_blowup, ldeg, zeta,
+                  list_lookup(base_rows, ci), list_lookup(opened, ci), alpha);
+        open_batch_2pt(b, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, log_degrees, zeta,
+                       base_rows, opened, alpha),
+    }
+  }
+
+  -- The quotient batch: `qd` chunks per circuit, one opening point (ζ) each.
+  fn open_q_chunks(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, lh: G,
+      chunk: G, qrem: G, zeta: Ext, base_rows: List‹List‹U64››,
+      q_opened: OpenedRound, alpha: Ext) -> (List‹Bucket›, G) {
+    match qrem {
+      0 => (buckets, chunk),
+      _ =>
+        let b = ri_apply(buckets, lh, idxbits, log_gmax, zeta,
+                  list_lookup(base_rows, chunk), list_lookup(list_lookup(q_opened, chunk), 0), alpha);
+        open_q_chunks(b, idxbits, log_gmax, lh, chunk + 1, qrem - 1, zeta, base_rows, q_opened, alpha),
+    }
+  }
+  fn open_quotient(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, log_blowup: G, ci: G,
+      rem: G, chunk: G, circuits: List‹SysCircuit›, log_degrees: List‹U8›, zeta: Ext,
+      base_rows: List‹List‹U64››, q_opened: OpenedRound, alpha: Ext) -> List‹Bucket› {
+    match rem {
+      0 => buckets,
+      _ =>
+        let SysCircuit.Mk(_a, _cc, md, _ph, _pw, _w1, _w2) = list_lookup(circuits, ci);
+        let qd = quotient_degree_of(md);
+        let lh = to_field(list_lookup(log_degrees, ci)) + log_blowup;
+        let (b, chunk2) = open_q_chunks(buckets, idxbits, log_gmax, lh, chunk, qd, zeta, base_rows, q_opened, alpha);
+        open_quotient(b, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, chunk2, circuits, log_degrees, zeta, base_rows, q_opened, alpha),
+    }
+  }
+
+  -- The preprocessed batch: only circuits with `prep_indices[i] = Some(j)`;
+  -- `k` tracks the position in the preprocessed commitment (= base-row index).
+  fn open_prep(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, log_blowup: G, ci: G, rem: G,
+      k: G, log_degrees: List‹U8›, prep_indices: List‹OptIdx›, zeta: Ext,
+      base_rows: List‹List‹U64››, prep_round: OpenedRound, alpha: Ext) -> List‹Bucket› {
+    match rem {
+      0 => buckets,
+      _ => match list_lookup(prep_indices, ci) {
+        OptIdx.NoIdx =>
+          open_prep(buckets, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, k, log_degrees,
+                    prep_indices, zeta, base_rows, prep_round, alpha),
+        OptIdx.SomeIdx(_j) =>
+          let ldeg = to_field(list_lookup(log_degrees, ci));
+          let b = open_2pt_mat(buckets, idxbits, log_gmax, ldeg + log_blowup, ldeg, zeta,
+                    list_lookup(base_rows, k), list_lookup(prep_round, k), alpha);
+          open_prep(b, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, k + 1, log_degrees,
+                    prep_indices, zeta, base_rows, prep_round, alpha),
+      },
+    }
+  }
+  fn open_prep_batch(buckets: List‹Bucket›, input_proof: List‹BatchOpening›,
+      prep_commit: MerkleCap, prep_opt: PreprocessedOpt, prep_indices: List‹OptIdx›,
+      log_degrees: List‹U8›, num_circuits: G, idxbits: List‹G›, log_gmax: G, log_blowup: G,
+      zeta: Ext, alpha: Ext) -> List‹Bucket› {
+    match prep_opt {
+      PreprocessedOpt.NoPreprocessed => buckets,
+      PreprocessedOpt.SomePreprocessed(prep_round) =>
+        let BatchOpening.Mk(rows_p, proof_p) = list_lookup(input_proof, 3);
+        -- one opened base row per preprocessed matrix (BatchOpenedValuesCountMismatch)
+        assert_eq!(eq_zero(list_length(rows_p) - list_length(prep_round)), 1);
+        assert_eq!(mmcs_verify(prep_commit, rows_p,
+          heights_prep(log_degrees, log_blowup, prep_indices, num_circuits, 0), idxbits, proof_p, log_gmax), 1);
+        open_prep(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, 0, log_degrees,
+                  prep_indices, zeta, rows_p, prep_round, alpha),
+    }
+  }
+
+  -- ── per-batch input-MMCS matrix log-heights (`log_degree + log_blowup`) ────
+  fn heights_all(log_degrees: List‹U8›, log_blowup: G, rem: G, i: G) -> List‹G› {
+    match rem {
+      0 => store(ListNode.Nil),
+      _ => store(ListNode.Cons(to_field(list_lookup(log_degrees, i)) + log_blowup,
+                               heights_all(log_degrees, log_blowup, rem - 1, i + 1))),
+    }
+  }
+  fn heights_quotient(circuits: List‹SysCircuit›, log_degrees: List‹U8›, log_blowup: G, rem: G, i: G) -> List‹G› {
+    match rem {
+      0 => store(ListNode.Nil),
+      _ =>
+        let SysCircuit.Mk(_a, _cc, md, _ph, _pw, _w1, _w2) = list_lookup(circuits, i);
+        list_concat(repeat_g(to_field(list_lookup(log_degrees, i)) + log_blowup, quotient_degree_of(md)),
+                    heights_quotient(circuits, log_degrees, log_blowup, rem - 1, i + 1)),
+    }
+  }
+  fn heights_prep(log_degrees: List‹U8›, log_blowup: G, prep_indices: List‹OptIdx›, rem: G, i: G) -> List‹G› {
+    match rem {
+      0 => store(ListNode.Nil),
+      _ => match list_lookup(prep_indices, i) {
+        OptIdx.NoIdx => heights_prep(log_degrees, log_blowup, prep_indices, rem - 1, i + 1),
+        OptIdx.SomeIdx(_j) =>
+          store(ListNode.Cons(to_field(list_lookup(log_degrees, i)) + log_blowup,
+                              heights_prep(log_degrees, log_blowup, prep_indices, rem - 1, i + 1))),
+      },
+    }
+  }
+
+  -- ── FRI fold chain (`verify_query`, arity 2) ──────────────────────────────
+  -- Reconstruct the sibling pair: evals[index_in_group] = folded, other = sib.
+  fn recon_evals(bit: G, folded: Ext, sib: Ext) -> (Ext, Ext) {
+    match bit {
+      0 => (folded, sib),
+      _ => (sib, folded),
+    }
+  }
+  -- Flatten two ext evals to the 4 base coords of the ExtensionMmcs leaf row.
+  fn flatten2(e0: Ext, e1: Ext) -> List‹U64› {
+    store(ListNode.Cons(e0[0], store(ListNode.Cons(e0[1],
+      store(ListNode.Cons(e1[0], store(ListNode.Cons(e1[1], store(ListNode.Nil)))))))))
+  }
+  -- Roll the next reduced opening into the folded eval when its height matches
+  -- the new folded height: `folded += beta^(2^log_arity) · ro`  (log_arity = 1).
+  fn rollin(folded: Ext, log_folded: G, beta: Ext, ro_rest: List‹Bucket›) -> (Ext, List‹Bucket›) {
+    match load(ro_rest) {
+      ListNode.Nil => (folded, ro_rest),
+      ListNode.Cons(b, rest) =>
+        let Bucket.Mk(h, _ap, ro) = b;
+        match eq_zero(h - log_folded) {
+          1 => (eg_add(folded, eg_mul(ext_exp_pow2(beta, 1), ro)), rest),
+          _ => (folded, ro_rest),
+        },
+    }
+  }
+  fn verify_query(folded: Ext, betas: List‹Ext›, comms: List‹MerkleCap›,
+      openings: List‹CommitPhaseProofStep›, domidx: List‹G›, log_cur: G,
+      ro_rest: List‹Bucket›, log_final: G) -> Ext {
+    match load(openings) {
+      ListNode.Nil =>
+        -- must have folded down to exactly the final domain size, and every
+        -- reduced opening must have been rolled in (FinalFoldHeightMismatch /
+        -- UnconsumedReducedOpenings).
+        assert_eq!(eq_zero(log_cur - log_final), 1);
+        assert_eq!(list_length(ro_rest), 0);
+        folded,
+      ListNode.Cons(op, op_rest) =>
+        let &ListNode.Cons(beta, beta_rest) = betas;
+        let &ListNode.Cons(comm, comm_rest) = comms;
+        let CommitPhaseProofStep.Mk(_la, sibs, oproof) = op;
+        -- arity 2 ⇒ exactly arity-1 = 1 sibling (SiblingValuesLengthMismatch).
+        assert_eq!(list_length(sibs), 1);
+        let &ListNode.Cons(ibit, idrest) = domidx;     -- index_in_group = LSB
+        let log_folded = log_cur - 1;
+        let (e0, e1) = recon_evals(ibit, folded, list_lookup(sibs, 0));
+        -- authenticate the sibling pair against this round's commitment
+        assert_eq!(mmcs_verify(comm, store(ListNode.Cons(flatten2(e0, e1), store(ListNode.Nil))),
+          store(ListNode.Cons(log_folded, store(ListNode.Nil))), idrest, oproof, log_folded), 1);
+        let folded1 = fri_fold2(idrest, log_folded, beta, e0, e1);
+        let (folded2, ro_rest2) = rollin(folded1, log_folded, beta, ro_rest);
+        verify_query(folded2, beta_rest, comm_rest, op_rest, idrest, log_folded, ro_rest2, log_final),
+    }
+  }
+
+  -- ── one FRI query ─────────────────────────────────────────────────────────
+  -- For the query index `idxbits`: build the reduced-opening accumulators,
+  -- authenticate each input batch (input MMCS), run the fold chain, and check
+  -- the final polynomial. `log_final = log_blowup` (log_final_poly_len = 0).
+  fn verify_one_query(idxbits: List‹G›, qp: QueryProof, alpha: Ext,
+      stage1: OpenedRound, stage2: OpenedRound, q_opened: OpenedRound,
+      prep_opt: PreprocessedOpt, s1c: MerkleCap, s2c: MerkleCap, qc: MerkleCap,
+      prep_commit: MerkleCap, circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›,
+      log_degrees: List‹U8›, zeta: Ext, num_circuits: G, log_blowup: G, log_gmax: G,
+      betas: List‹Ext›, commit_phase_commits: List‹MerkleCap›, final_poly: List‹Ext›,
+      num_rounds: G) -> G {
+    let QueryProof.Mk(input_proof, commit_phase_openings) = qp;
+    -- one commit-phase opening per round (QueryCommitPhaseOpeningsCountMismatch),
+    -- one input batch per commitment (InputProofBatchCountMismatch).
+    assert_eq!(eq_zero(list_length(commit_phase_openings) - num_rounds), 1);
+    assert_eq!(eq_zero(list_length(input_proof) - (3 + prep_count(prep_opt))), 1);
+    let buckets = build_buckets(log_degrees, log_blowup, num_circuits, log_gmax);
+    let BatchOpening.Mk(rows_s1, proof_s1) = list_lookup(input_proof, 0);
+    assert_eq!(eq_zero(list_length(rows_s1) - num_circuits), 1);
+    assert_eq!(mmcs_verify(s1c, rows_s1, heights_all(log_degrees, log_blowup, num_circuits, 0), idxbits, proof_s1, log_gmax), 1);
+    let buckets = open_batch_2pt(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, log_degrees, zeta, rows_s1, stage1, alpha);
+    let BatchOpening.Mk(rows_s2, proof_s2) = list_lookup(input_proof, 1);
+    assert_eq!(eq_zero(list_length(rows_s2) - num_circuits), 1);
+    assert_eq!(mmcs_verify(s2c, rows_s2, heights_all(log_degrees, log_blowup, num_circuits, 0), idxbits, proof_s2, log_gmax), 1);
+    let buckets = open_batch_2pt(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, log_degrees, zeta, rows_s2, stage2, alpha);
+    let BatchOpening.Mk(rows_q, proof_q) = list_lookup(input_proof, 2);
+    assert_eq!(eq_zero(list_length(rows_q) - list_length(q_opened)), 1);
+    assert_eq!(mmcs_verify(qc, rows_q, heights_quotient(circuits, log_degrees, log_blowup, num_circuits, 0), idxbits, proof_q, log_gmax), 1);
+    let buckets = open_quotient(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, 0, circuits, log_degrees, zeta, rows_q, q_opened, alpha);
+    let buckets = open_prep_batch(buckets, input_proof, prep_commit, prep_opt, prep_indices, log_degrees, num_circuits, idxbits, log_gmax, log_blowup, zeta, alpha);
+    -- a height-`log_blowup` (constant-poly) reduced opening must be zero
+    let _cz = assert_blowup_zero(buckets, log_blowup);
+    -- the first reduced opening must sit at log_global_max_height
+    -- (InitialReducedOpeningHeightMismatch).
+    let &ListNode.Cons(b0, ro_rest) = buckets;
+    let Bucket.Mk(h0, _ap0, folded_start) = b0;
+    assert_eq!(eq_zero(h0 - log_gmax), 1);
+    let folded = verify_query(folded_start, betas, commit_phase_commits, commit_phase_openings, idxbits, log_gmax, ro_rest, log_blowup);
+    -- final check: with log_final_poly_len = 0, eval = final_poly[0]
+    assert_eq!(eg_eq(list_lookup(final_poly, 0), folded), 1);
     1
   }
 
+  -- Loop over all `num_queries` query proofs, sampling one index per query
+  -- (consecutive `sample_bits` continue the same challenger stream).
+  fn query_loop(input: ByteStream, output: ByteStream, query_proofs: List‹QueryProof›,
+      alpha: Ext, stage1: OpenedRound, stage2: OpenedRound, q_opened: OpenedRound,
+      prep_opt: PreprocessedOpt, s1c: MerkleCap, s2c: MerkleCap, qc: MerkleCap,
+      prep_commit: MerkleCap, circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›,
+      log_degrees: List‹U8›, zeta: Ext, num_circuits: G, log_blowup: G, log_gmax: G,
+      betas: List‹Ext›, commit_phase_commits: List‹MerkleCap›, final_poly: List‹Ext›,
+      num_rounds: G) -> G {
+    match load(query_proofs) {
+      ListNode.Nil => 1,
+      ListNode.Cons(qp, rest) =>
+        let (idxbits, input2, output2) = ch_sample_bits(input, output, log_gmax);
+        let _q = verify_one_query(idxbits, qp, alpha, stage1, stage2, q_opened, prep_opt,
+          s1c, s2c, qc, prep_commit, circuits, prep_indices, log_degrees, zeta, num_circuits,
+          log_blowup, log_gmax, betas, commit_phase_commits, final_poly, num_rounds);
+        query_loop(input2, output2, rest, alpha, stage1, stage2, q_opened, prep_opt,
+          s1c, s2c, qc, prep_commit, circuits, prep_indices, log_degrees, zeta, num_circuits,
+          log_blowup, log_gmax, betas, commit_phase_commits, final_poly, num_rounds),
+    }
+  }
+
+  -- ── top-level FRI verification ────────────────────────────────────────────
+  -- `log_blowup` comes from the verifying key; `num_queries` / `commit_pow_bits`
+  -- are the protocol's FRI parameters (public). `query_pow_bits` is taken to be 0
+  -- (our system), so the query-phase grinding check is a no-op.
+  fn pcs_fri_verify(post_zeta_input: ByteStream, stage1: OpenedRound, stage2: OpenedRound,
+      q_opened: OpenedRound, prep_opt: PreprocessedOpt, opening: FriProof,
+      s1c: MerkleCap, s2c: MerkleCap, qc: MerkleCap, prep_commit: MerkleCap,
+      circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›, log_degrees: List‹U8›,
+      zeta: Ext, num_circuits: G, log_blowup: G, num_queries: G, commit_pow_bits: G) -> G {
+    let FriProof.Mk(commit_phase_commits, pw, query_proofs, final_poly, _qpw) = opening;
+    let num_rounds = list_length(commit_phase_commits);
+    -- FRI shape: one PoW witness per round, num_queries query proofs, and (since
+    -- log_final_poly_len = 0) a single final-poly coefficient.
+    assert_eq!(eq_zero(list_length(pw) - num_rounds), 1);
+    assert_eq!(eq_zero(list_length(query_proofs) - num_queries), 1);
+    assert_eq!(list_length(final_poly), 1);
+    -- challenger continuation: observe all opened values (coms_to_verify order)
+    let input = obs_round(post_zeta_input, stage1);
+    let input = obs_round(input, stage2);
+    let input = obs_round(input, q_opened);
+    let input = obs_prep(input, prep_opt);
+    -- PCS batch-combination challenge α
+    let (a0, a1, input, _oa) = ch_sample_ext(input, store(ListNode.Nil));
+    let alpha = [gl_reduce(a0), gl_reduce(a1)];
+    -- per-round FRI fold challenges β (with commit-phase PoW), then observe
+    -- final_poly + the log-arity schedule.
+    let (betas, input) = pcs_betas(input, commit_phase_commits, pw, commit_pow_bits);
+    let input = obs_ext_row(input, final_poly);
+    let input = obs_log_arities(input, commit_phase_commits);
+    -- query indices + per-query verification (log_global_max_height = #rounds + log_blowup)
+    let log_gmax = num_rounds + log_blowup;
+    query_loop(input, store(ListNode.Nil), query_proofs, alpha, stage1, stage2, q_opened,
+      prep_opt, s1c, s2c, qc, prep_commit, circuits, prep_indices, log_degrees, zeta,
+      num_circuits, log_blowup, log_gmax, betas, commit_phase_commits, final_poly, num_rounds)
+  }
+
   -- ==========================================================================
   -- Self-test (validation): the keccak MMCS sponge/compression against
   -- reference values from `multi-stark`'s own hasher (`pcs_ref_values`).
@@ -413,6 +844,10 @@ def pcs := ⟦
     let d17 = mmcs_hash_row(build_range(0, 17));
     assert_eq!(assert_digest(d17, 0x388da73622e8fdd5, 0xec687be9c50d2218,
                                   0x528d145dfe6571af, 0xd2eb808dfba4703c), 1);
+    -- LEAF22: full block + 5-element partial (two permutes), >20 lanes.
+    let d22 = mmcs_hash_row(build_range(0, 22));
+    assert_eq!(assert_digest(d22, 520358013996801752, 12301199992631688477,
+      8732686820159480415, 10883226686987971725), 1);
     -- LEAF20: full block + 3-element partial (two permutes).
     let d20 = mmcs_hash_row(build_range(0, 20));
     assert_eq!(assert_digest(d20, 0xec696847be88d358, 0x202861c67ff4cec8,
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index f9c067a5..ed8ed902 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -434,7 +434,7 @@ def verifier := ⟦
   -- failed check, exactly as the Rust verifier returns `Err`.
   fn verify(proof: Proof) -> G {
     match proof {
-      Proof.Mk(_commitments, accs, _log_degrees, opening,
+      Proof.Mk(_commitments, accs, _log_degrees, _opening,
                _quotient, _preprocessed, stage_1, stage_2) =>
         -- Step 1 (shape, system-independent): the per-round opened-value lists
         -- and the accumulator list all have the same length = the circuit count.
@@ -446,9 +446,8 @@ def verifier := ⟦
 
         -- Step 2: accumulator balance — the last accumulator must be zero.
         assert_eq!(last_acc_is_zero(accs), 1);
-
-        -- Step 4: PCS opening proof (stubbed — accepts; see Pcs.lean).
-        let _pcs = pcs_verify(opening);
+        -- Step 4 (PCS/FRI) now runs inside `ood_verify`, which has the verifying
+        -- key, the challenger continuation, and the opened values it needs.
         1,
     }
   }
@@ -795,17 +794,27 @@ def verifier := ⟦
   -- commitments + log_degrees + claims, seed the lookup accumulator from the
   -- claims, then run the OOD composition/quotient check for every circuit.
   -- Returns 1 on success (any mismatch aborts via `assert_eq!`).
-  fn ood_verify(sys: Sys, proof: Proof, claims: List‹List‹U64››) -> G {
-    let Sys.Mk(_params, circuits, commit, prep_indices) = sys;
+  fn ood_verify(sys: Sys, proof: Proof, claims: List‹List‹U64››,
+      num_queries: G, commit_pow_bits: G) -> G {
+    -- `log_blowup` is part of the verifying key (CommitmentParameters); the FRI
+    -- `num_queries` / `commit_pow_bits` are protocol parameters (public inputs).
+    let Sys.Mk(params, circuits, commit, prep_indices) = sys;
+    let SysParams.Mk(log_blowup, _cap_height) = params;
     match proof {
-      Proof.Mk(commitments, accs, log_degrees, _opening,
+      Proof.Mk(commitments, accs, log_degrees, opening,
                q_opened, prep_opt, stage1, stage2) =>
         let Commitments.Mk(s1c, s2c, qc) = commitments;
         let prep_cap = opt_commit_cap(commit);
-        let (lch, fch, alpha, zeta, _post_zeta_input) = fiat_shamir(prep_cap, s1c, s2c, qc, log_degrees, claims);
+        let (lch, fch, alpha, zeta, post_zeta_input) = fiat_shamir(prep_cap, s1c, s2c, qc, log_degrees, claims);
         let acc0 = claims_acc([gl_zero(), gl_zero()], claims, lch, fch);
-        ood_loop(circuits, prep_indices, log_degrees, accs, stage1, stage2,
-                 prep_opt, q_opened, 0, acc0, 0, lch, fch, alpha, zeta),
+        -- Step 5: OOD composition/quotient identity for every circuit.
+        let _ood = ood_loop(circuits, prep_indices, log_degrees, accs, stage1, stage2,
+                 prep_opt, q_opened, 0, acc0, 0, lch, fch, alpha, zeta);
+        -- Step 4: FRI PCS proximity + opening consistency, continuing the same
+        -- Fiat-Shamir transcript past ζ (observe opened values → α, βs, query).
+        pcs_fri_verify(post_zeta_input, stage1, stage2, q_opened, prep_opt, opening,
+          s1c, s2c, qc, prep_cap, circuits, prep_indices, log_degrees, zeta,
+          list_length(circuits), log_blowup, num_queries, commit_pow_bits),
     }
   }
 
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 8b47e572..7c0fdd1c 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -55,21 +55,27 @@ def factorialProgram : Source.Toplevel := ⟦
   }
 ⟧
 
+/-- A tractable subset of the production FRI config that still exercises every
+generalized verifier path: `numQueries := 2` drives the multi-query loop,
+`commitProofOfWorkBits := 8` drives the commit-phase proof-of-work grinding
+check (one per FRI round), and `log_blowup` is read from the verifying key (the
+verifier code is blowup-value-agnostic). The full production parameters
+(`logBlowup := 2`, `numQueries := 100`, `commitProofOfWorkBits := 20`) use the
+same code paths but make *proving* (large LDE + 2²⁰ grinding per round) too slow
+for this end-to-end harness; the verifier itself handles them unchanged. -/
+def recCommitParams : Aiur.CommitmentParameters :=
+  { logBlowup := 2, capHeight := 0 }
+def innerFri : FriParameters :=
+  { logFinalPolyLen := 0, maxLogArity := 1, numQueries := 3,
+    commitProofOfWorkBits := 20, queryProofOfWorkBits := 0 }
+
 /-- Compile a toplevel and build its proving system, or fail with a message. -/
 def buildSystem (label : String) (top : Source.Toplevel) :
     IO (CompiledToplevel × AiurSystem) := do
   let compiled ← match top.compile with
     | .error e => throw <| IO.userError s!"{label}: compilation failed: {e}"
     | .ok c => pure c
-  pure (compiled, AiurSystem.build compiled.bytecode commitmentParameters)
-
-/-- Minimal FRI parameters for the *inner* proof: keccak-256 over the serialized
-proof runs one keccak-f per 136 bytes, so we keep the proof small (≈ a few KB)
-to make the in-circuit hash tractable to execute. Security of the inner proof is
-irrelevant for this end-to-end test. -/
-def innerFri : FriParameters :=
-  { logFinalPolyLen := 0, maxLogArity := 1, numQueries := 1,
-    commitProofOfWorkBits := 0, queryProofOfWorkBits := 0 }
+  pure (compiled, AiurSystem.build compiled.bytecode recCommitParams)
 
 def main : IO UInt32 := do
   -- ── 1. factorial system ──────────────────────────────────────────────────
@@ -116,9 +122,11 @@ def main : IO UInt32 := do
   let claimsDigestInput : Array Aiur.G := claimsDigest.data.map .ofUInt8
   let claimGs : Array Aiur.G := claimBytes.data.map .ofUInt8
 
-  -- Public input = proof digest ++ vk digest ++ claims digest;
-  -- IO hints: proof at [0], vk at [1], claims at [2].
-  let input : Array Aiur.G := digestInput ++ sysDigestInput ++ claimsDigestInput
+  -- Public input = proof digest ++ vk digest ++ claims digest ++ FRI params
+  -- (num_queries, commit_pow_bits); IO hints: proof at [0], vk at [1], claims at [2].
+  let friParamInput : Array Aiur.G :=
+    #[Aiur.G.ofNat innerFri.numQueries, Aiur.G.ofNat innerFri.commitProofOfWorkBits]
+  let input : Array Aiur.G := digestInput ++ sysDigestInput ++ claimsDigestInput ++ friParamInput
   let verifierIO : IOBuffer :=
     (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend #[Aiur.G.ofNat 2] claimGs
 
@@ -181,7 +189,7 @@ def main : IO UInt32 := do
     let badClaimBytes := serializeClaims #[badClaim]
     let badClaimsDigest := Keccak.hash badClaimBytes
     let badClaimInput : Array Aiur.G :=
-      digestInput ++ sysDigestInput ++ (badClaimsDigest.data.map .ofUInt8)
+      digestInput ++ sysDigestInput ++ (badClaimsDigest.data.map .ofUInt8) ++ friParamInput
     let badClaimIO : IOBuffer :=
       (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend
         #[Aiur.G.ofNat 2] (badClaimBytes.data.map .ofUInt8)

From 1e36966e7795cb76c840c2fe0e7285ca6e287502 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Wed, 10 Jun 2026 16:03:29 -0300
Subject: [PATCH 25/28] Fully non-deterministic proof

---
 Ix/MultiStark.lean          | 38 ++++++++++++++-------------
 Ix/MultiStark/Verifier.lean | 51 ++++++++++++++++++++++++-------------
 RecursiveVerifier.lean      | 44 +++++++++++++++++---------------
 3 files changed, 78 insertions(+), 55 deletions(-)

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index 5a534bd5..ad769fd2 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -12,14 +12,21 @@ public import Ix.MultiStark.Verifier
 /-!
 # Multi-STARK proof verifier (Aiur)
 
-A recursive-verifier scaffold that (a) deserializes a `multi_stark::prover::Proof`
-(`Ix/MultiStark/Deserialize.lean`) and (b) binds the received byte stream to a
-public digest: it recomputes `keccak256` (`Ix/MultiStark/Keccak.lean`, the hash
-multi-stark uses) over the bytes and asserts it equals the digest passed as
-public input.
+The recursive verifier. Its public statement is purely existential: *"there
+exists a valid multi-stark proof, under the FRI parameters given as public
+input, for the constraint system with this keccak-256 digest and these public
+claims."* The proof itself is **non-deterministic advice** (fed on IO key `[0]`,
+never hashed or otherwise bound as a public input): the Fiat-Shamir transcript
+replay plus the Merkle/OOD/FRI checks are exactly what make any accepted advice
+a valid proof — a hash binding of the proof bytes would add nothing to the
+statement, while costing one keccak-f per 136 bytes in-circuit.
 
-The remaining verification logic (FRI, Merkle, Fiat-Shamir) will hang off
-`read_proof`.
+The verifying key and claims, by contrast, ARE digest-bound (`system_digest`,
+`claims_digest`): they determine *what was proven*.
+
+Fixed protocol assumptions (our system): `queryProofOfWorkBits = 0`,
+`capHeight = 0`, `maxLogArity = 1`, `logFinalPolyLen = 0`. The variable FRI
+parameters (`num_queries`, `commit_pow_bits`, `log_blowup`) are public inputs.
 -/
 
 public section
@@ -27,19 +34,16 @@ public section
 namespace MultiStark
 
 def entrypoints := ⟦
-  -- Public input: the 32-byte keccak-256 digest of the proof, as 4 little-endian
-  -- byte lanes. Read the proof bytes non-deterministically from IO key `[0]`,
-  -- deserialize into a `Proof` object (asserting the whole stream was consumed),
-  -- then recompute keccak-256 over the same bytes and assert it equals `digest`
-  -- — binding the IO-fed bytes to the public commitment.
-  pub fn verify_multi_stark_proof(digest: [[U8; 8]; 4], system_digest: [[U8; 8]; 4], claims_digest: [[U8; 8]; 4], num_queries: G, commit_pow_bits: G) {
-    -- Proof from IO key [0]: deserialize, assert fully consumed, and bind the
-    -- bytes to the public keccak-256 `digest`.
+  -- Public inputs: the keccak-256 digests of the verifying key and the claims
+  -- (4 little-endian u64 lanes each) plus the variable FRI parameters. The
+  -- proof is pure non-deterministic advice on IO key `[0]` — see the module
+  -- docstring.
+  pub fn verify_multi_stark_proof(system_digest: [[U8; 8]; 4], claims_digest: [[U8; 8]; 4], num_queries: G, commit_pow_bits: G, log_blowup: G) {
+    -- Proof advice from IO key [0]: deserialize, assert fully consumed.
     let (idx, len) = io_get_info([0]);
     let bytes = #read_byte_stream(idx, len);
     let (proof, rest) = read_proof(bytes);
     assert_eq!(load(rest), ListNode.Nil);
-    assert_eq!(keccak256(bytes), digest);
     -- Verifying key (`System<AiurCircuit>`) from IO key [1]: bind the bytes to
     -- the public keccak-256 `system_digest`, then reconstruct the system.
     let (sidx, slen) = io_get_info([1]);
@@ -60,7 +64,7 @@ def entrypoints := ⟦
     assert_eq!(verify(proof), 1);
     -- Step 3 + 5: prover-faithful Fiat-Shamir replay and the out-of-domain
     -- composition/quotient check, `composition(ζ)·inv_vanishing(ζ) == quotient(ζ)`.
-    assert_eq!(ood_verify(sys, proof, claims, num_queries, commit_pow_bits), 1);
+    assert_eq!(ood_verify(sys, proof, claims, num_queries, commit_pow_bits, log_blowup), 1);
     ()
   }
 ⟧
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index ed8ed902..af4aeac0 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -47,10 +47,9 @@ The Rust verifier runs these steps:
   the verifier accepts the honest proof and rejects a tampered claim.
 
 ### Stubbed / TODO
-* `fiat_shamir` does no rejection sampling: a sampled 8-byte limb in the band
-  `[p, 2⁶⁴)` (probability ≈ 2⁻³²) is reduced mod p instead of being rejected
-  and resampled. For small proofs no sample lands in the band, but honest
-  rejection sampling is needed for full generality.
+* Base-field samples are rejection-sampled (`ch_sample_field`): a raw 8-byte
+  limb in the band `[p, 2⁶⁴)` (probability ≈ 2⁻³²) is discarded and redrawn,
+  consuming challenger bytes exactly as `SerializingChallenger64::sample` does.
 * The PCS opening proof (`pcs_verify`) is still an accept-stub. With the PCS
   stubbed, this verifier checks every algebraic relation except FRI proximity,
   so it is not yet fully sound.
@@ -157,14 +156,28 @@ def verifier := ⟦
     ([b0, b1, b2, b3, b4, b5, b6, b7], i7, o7)
   }
 
+  -- Sample one base-field element with REJECTION SAMPLING, mirroring
+  -- `SerializingChallenger64::sample`'s inner loop: draw 8 bytes as a LE u64
+  -- (the `log2_ceil(p) = 64` mask is a no-op for Goldilocks); if the raw value
+  -- is ≥ p (probability ≈ 2⁻³²), DISCARD it and draw the next 8 bytes — a
+  -- rejected draw consumes challenger bytes, shifting every later sample,
+  -- exactly as in the reference. `raw < p` ⟺ `sub8(raw, p)` borrows. The
+  -- accepted limb is canonical (< p) by construction.
+  fn ch_sample_field(input: ByteStream, output: ByteStream) -> ([U8; 8], ByteStream, ByteStream) {
+    let (raw, i1, o1) = ch_sample8(input, output);
+    let (_d, borrow) = sub8(raw, gl_p());
+    match borrow {
+      1 => (raw, i1, o1),
+      _ => ch_sample_field(i1, o1),
+    }
+  }
+
   -- Sample a degree-2 extension element: two base samples (`from_basis_*`),
-  -- returning their raw 8-byte LE limbs (so they can be re-observed) and the
-  -- threaded challenger. NOTE: the ~2⁻³² rejection band (limb ≥ p) is not
-  -- handled — `limb_to_field` reduces mod p instead. Honest rejection sampling
-  -- is a TODO for bit-exact agreement with the prover.
+  -- each rejection-sampled, returning their 8-byte LE limbs (canonical, but
+  -- also re-observable as raw bytes) and the threaded challenger.
   fn ch_sample_ext(input: ByteStream, output: ByteStream) -> ([U8; 8], [U8; 8], ByteStream, ByteStream) {
-    let (c0, i0, o0) = ch_sample8(input, output);
-    let (c1, i1, o1) = ch_sample8(i0, o0);
+    let (c0, i0, o0) = ch_sample_field(input, output);
+    let (c1, i1, o1) = ch_sample_field(i0, o0);
     (c0, c1, i1, o1)
   }
 
@@ -239,8 +252,8 @@ def verifier := ⟦
 
   -- Sample a degree-2 extension element, threading BOTH challenger buffers so a
   -- following consecutive sample continues from the same hash `output` stream
-  -- (no re-flush). Limbs are reduced mod p (`limb_to_field`); the ~2⁻³²
-  -- rejection band where a raw limb ≥ p is still unhandled (as in `fiat_shamir`).
+  -- (no re-flush). Limbs are rejection-sampled (`ch_sample_field`), so they are
+  -- canonical; the `gl_reduce` is a no-op kept for type/intent clarity.
   fn pcs_sample_ext(input: ByteStream, output: ByteStream)
       -> (Ext, ByteStream, ByteStream) {
     let (c0, c1, i1, o1) = ch_sample_ext(input, output);
@@ -312,9 +325,9 @@ def verifier := ⟦
   -- `observe` clears the challenger's output buffer, and every sample here is
   -- preceded by an observe, so each `ch_sample_ext` re-flushes from an empty
   -- output (hence the `store(ListNode.Nil)` output argument each time).
-  -- NOTE: the ~2⁻³² rejection band (a sampled 8-byte limb ≥ p) is still not
-  -- handled (`ch_sample8` reduces mod p); for small proofs no sample lands in
-  -- the band, but honest rejection sampling remains a TODO for full generality.
+  -- Every sample is rejection-sampled (`ch_sample_field` inside
+  -- `ch_sample_ext`), so a limb in the band `[p, 2⁶⁴)` is redrawn exactly as in
+  -- the reference challenger, and the limbs observed back are canonical.
   -- Also returns the post-ζ challenger `input` buffer, which the PCS phase
   -- (Phase 4+) continues observing into. The leftover `output` after the ζ
   -- sample is discarded — the next challenger op is an observe (of the opened
@@ -795,11 +808,13 @@ def verifier := ⟦
   -- claims, then run the OOD composition/quotient check for every circuit.
   -- Returns 1 on success (any mismatch aborts via `assert_eq!`).
   fn ood_verify(sys: Sys, proof: Proof, claims: List‹List‹U64››,
-      num_queries: G, commit_pow_bits: G) -> G {
-    -- `log_blowup` is part of the verifying key (CommitmentParameters); the FRI
-    -- `num_queries` / `commit_pow_bits` are protocol parameters (public inputs).
+      num_queries: G, commit_pow_bits: G, log_blowup_pub: G) -> G {
+    -- The FRI parameters (`num_queries`, `commit_pow_bits`, `log_blowup`) are
+    -- public inputs. `log_blowup` also lives in the (digest-bound) verifying
+    -- key's CommitmentParameters — assert the two agree.
     let Sys.Mk(params, circuits, commit, prep_indices) = sys;
     let SysParams.Mk(log_blowup, _cap_height) = params;
+    assert_eq!(eq_zero(log_blowup - log_blowup_pub), 1);
     match proof {
       Proof.Mk(commitments, accs, log_degrees, opening,
                q_opened, prep_opt, stage1, stage2) =>
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 7c0fdd1c..39a1a3ec 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -97,13 +97,10 @@ def main : IO UInt32 := do
   | .error e => IO.eprintln s!"inner proof failed to verify: {e}"; return 1
   | .ok _ => IO.println "✓ inner proof verifies"
 
-  -- ── 3. serialize proof, compute its keccak-256 digest (the public input) ──
+  -- ── 3. serialize the proof — pure non-deterministic advice (NOT hashed, not
+  --       a public input; the verifier's own checks are what bind it) ─────────
   let proofBytes := proof.toBytes
-  let blocks := (proofBytes.size + 1) / 136 + 1
-  IO.println s!"  serialized proof: {proofBytes.size} bytes  (~{blocks} keccak-f blocks)"
-  let digest := Keccak.hash proofBytes
-  IO.println s!"  keccak256(proof) = {toHex digest}"
-  let digestInput : Array Aiur.G := digest.data.map .ofUInt8
+  IO.println s!"  serialized proof: {proofBytes.size} bytes (advice, not digest-bound)"
   let proofGs : Array Aiur.G := proofBytes.data.map .ofUInt8
 
   -- Verifying key (`System<AiurCircuit>`) bytes + its keccak-256 digest.
@@ -122,11 +119,12 @@ def main : IO UInt32 := do
   let claimsDigestInput : Array Aiur.G := claimsDigest.data.map .ofUInt8
   let claimGs : Array Aiur.G := claimBytes.data.map .ofUInt8
 
-  -- Public input = proof digest ++ vk digest ++ claims digest ++ FRI params
-  -- (num_queries, commit_pow_bits); IO hints: proof at [0], vk at [1], claims at [2].
+  -- Public input = vk digest ++ claims digest ++ FRI params (num_queries,
+  -- commit_pow_bits, log_blowup); IO hints: proof at [0], vk at [1], claims at [2].
   let friParamInput : Array Aiur.G :=
-    #[Aiur.G.ofNat innerFri.numQueries, Aiur.G.ofNat innerFri.commitProofOfWorkBits]
-  let input : Array Aiur.G := digestInput ++ sysDigestInput ++ claimsDigestInput ++ friParamInput
+    #[Aiur.G.ofNat innerFri.numQueries, Aiur.G.ofNat innerFri.commitProofOfWorkBits,
+      Aiur.G.ofNat recCommitParams.logBlowup]
+  let input : Array Aiur.G := sysDigestInput ++ claimsDigestInput ++ friParamInput
   let verifierIO : IOBuffer :=
     (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend #[Aiur.G.ofNat 2] claimGs
 
@@ -168,18 +166,24 @@ def main : IO UInt32 := do
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1
 
-  -- ── 5. run the verifier: deserialize proof + vk, recompute keccak digests,
-  --       reconstruct the System<AiurCircuit>, run structural checks ──────────
-  IO.println "running verifier (proof + verifying-key deserialize + keccak binding)…"
+  -- ── 5. run the verifier: deserialize proof (advice) + vk, replay
+  --       Fiat-Shamir, run the OOD + FRI checks ─────────────────────────────
+  IO.println "running verifier (advice proof + vk digest binding + OOD + FRI)…"
   match vCompiled.bytecode.execute vIdx input verifierIO with
   | .error e => IO.eprintln s!"✗ verifier rejected: {e}"; return 1
   | .ok (_, _, queryCounts) =>
-    IO.println "✓ verifier accepted: proof + vk deserialized, both keccak digests match"
-    -- ── 6. negative test: a tampered proof digest must be rejected ──────────
-    let badInput := input.set! 0 (Aiur.G.ofNat ((digest.data[0]!.toNat + 1) % 256))
-    match vCompiled.bytecode.execute vIdx badInput verifierIO with
-    | .error _ => IO.println "✓ tampered digest correctly rejected (assert_eq failed)"
-    | .ok _ => IO.eprintln "✗ tampered digest was NOT rejected"; return 1
+    IO.println "✓ verifier accepted: vk digest bound, OOD + FRI checks pass"
+    -- ── 6. negative test: tampered proof ADVICE must be rejected by the
+    --        verification checks themselves (no digest binding any more) —
+    --        this isolates the Fiat-Shamir/Merkle/OOD/FRI rejection path.
+    --        Byte 0 is the first stage_1-commitment limb: it diverges the
+    --        replayed transcript from the one the proof was generated under.
+    let badProofGs := proofGs.set! 0 (Aiur.G.ofNat ((proofBytes.data[0]!.toNat + 1) % 256))
+    let badProofIO : IOBuffer :=
+      (((default : IOBuffer).extend #[Aiur.G.ofNat 0] badProofGs).extend #[Aiur.G.ofNat 1] vkGs).extend #[Aiur.G.ofNat 2] claimGs
+    match vCompiled.bytecode.execute vIdx input badProofIO with
+    | .error _ => IO.println "✓ tampered proof advice correctly rejected (verification checks)"
+    | .ok _ => IO.eprintln "✗ tampered proof advice was NOT rejected"; return 1
     -- ── 6b. negative test: a tampered CLAIM (with a matching keccak digest)
     --        must be rejected by the OOD / accumulator check. Changing the
     --        claim feeds a different value into Fiat-Shamir (→ different ζ) and
@@ -189,7 +193,7 @@ def main : IO UInt32 := do
     let badClaimBytes := serializeClaims #[badClaim]
     let badClaimsDigest := Keccak.hash badClaimBytes
     let badClaimInput : Array Aiur.G :=
-      digestInput ++ sysDigestInput ++ (badClaimsDigest.data.map .ofUInt8) ++ friParamInput
+      sysDigestInput ++ (badClaimsDigest.data.map .ofUInt8) ++ friParamInput
     let badClaimIO : IOBuffer :=
       (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend
         #[Aiur.G.ofNat 2] (badClaimBytes.data.map .ofUInt8)

From 409f7e539af4bef46ba8ccf2694f06eba71b6006 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Thu, 11 Jun 2026 11:37:36 -0300
Subject: [PATCH 26/28] Tests in a different toplevel

---
 Ix/MultiStark.lean            |  10 ++
 Ix/MultiStark/Goldilocks.lean |  70 --------
 Ix/MultiStark/Pcs.lean        | 143 -----------------
 Ix/MultiStark/Tests.lean      | 294 ++++++++++++++++++++++++++++++++++
 Ix/MultiStark/Verifier.lean   |  44 -----
 RecursiveVerifier.lean        |  53 +++---
 6 files changed, 328 insertions(+), 286 deletions(-)
 create mode 100644 Ix/MultiStark/Tests.lean

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index ad769fd2..cf870e81 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -8,6 +8,7 @@ public import Ix.MultiStark.Keccak
 public import Ix.MultiStark.Pcs
 public import Ix.MultiStark.SystemDeserialize
 public import Ix.MultiStark.Verifier
+public import Ix.MultiStark.Tests
 
 /-!
 # Multi-STARK proof verifier (Aiur)
@@ -82,6 +83,15 @@ def multiStark : Except Aiur.Global Aiur.Source.Toplevel := do
   let t ← t.merge verifier
   t.merge entrypoints
 
+/-- The verifier toplevel PLUS its self-test entrypoints
+(`Ix/MultiStark/Tests.lean`). Kept separate from `multiStark` because every
+`pub fn` adds a circuit to the compiled system — the production verifier should
+not carry test-only width. Use this toplevel only to run the `*_test`
+entrypoints. -/
+def multiStarkTests : Except Aiur.Global Aiur.Source.Toplevel := do
+  let t ← multiStark
+  t.merge tests
+
 end MultiStark
 
 end
diff --git a/Ix/MultiStark/Goldilocks.lean b/Ix/MultiStark/Goldilocks.lean
index 04959ab0..6bfc8c3a 100644
--- a/Ix/MultiStark/Goldilocks.lean
+++ b/Ix/MultiStark/Goldilocks.lean
@@ -311,76 +311,6 @@ def goldilocks := ⟦
     gl_eq(a[0], b[0]) * gl_eq(a[1], b[1])
   }
 
-  -- ==========================================================================
-  -- Self-test (vs `gl_ops_ref`).
-  -- ==========================================================================
-  fn assert_g8(x: Goldilocks, e: Goldilocks) -> G {
-    assert_eq!(to_field(x[0]), to_field(e[0]));
-    assert_eq!(to_field(x[1]), to_field(e[1]));
-    assert_eq!(to_field(x[2]), to_field(e[2]));
-    assert_eq!(to_field(x[3]), to_field(e[3]));
-    assert_eq!(to_field(x[4]), to_field(e[4]));
-    assert_eq!(to_field(x[5]), to_field(e[5]));
-    assert_eq!(to_field(x[6]), to_field(e[6]));
-    assert_eq!(to_field(x[7]), to_field(e[7]));
-    1
-  }
-  pub fn gl_addsub_test() -> G {
-    let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210
-    let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0
-    assert_eq!(assert_g8(gl_add(a, b), [255u8, 16u8, 17u8, 17u8, 18u8, 17u8, 17u8, 17u8]), 1);
-    assert_eq!(assert_g8(gl_sub(a, b), [32u8, 83u8, 151u8, 219u8, 31u8, 100u8, 168u8, 236u8]), 1);
-    assert_eq!(assert_g8(gl_sub(b, a), [225u8, 172u8, 104u8, 36u8, 223u8, 155u8, 87u8, 19u8]), 1);
-    -- edge: (p-1) + 5 ≡ 4 ; 5 - (p-1) ≡ 6
-    let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8]; -- 0xFFFFFFFF00000000
-    let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
-    assert_eq!(assert_g8(gl_add(pm1, five), [4u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1);
-    assert_eq!(assert_g8(gl_sub(five, pm1), [6u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1);
-    1
-  }
-
-  fn assert_eg(x: ExtGoldilocks, e0: Goldilocks, e1: Goldilocks) -> G {
-    assert_eq!(assert_g8(x[0], e0), 1);
-    assert_eq!(assert_g8(x[1], e1), 1);
-    1
-  }
-  pub fn gl_muldiv_test() -> G {
-    let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210
-    let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0
-    assert_eq!(assert_g8(gl_mul(a, b), [212u8, 186u8, 123u8, 108u8, 31u8, 253u8, 234u8, 250u8]), 1);
-    assert_eq!(assert_g8(gl_inverse(a), [97u8, 29u8, 109u8, 46u8, 183u8, 100u8, 8u8, 102u8]), 1);
-    assert_eq!(assert_g8(gl_div(a, b), [63u8, 59u8, 61u8, 54u8, 46u8, 255u8, 29u8, 186u8]), 1);
-    -- edge: (p-1)·5 ≡ p-5
-    let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8];
-    let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
-    assert_eq!(assert_g8(gl_mul(pm1, five), [252u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8]), 1);
-    -- a·a⁻¹ = 1 and b·b⁻¹ = 1
-    assert_eq!(assert_g8(gl_mul(a, gl_inverse(a)), gl_one()), 1);
-    assert_eq!(assert_g8(gl_mul(b, gl_inverse(b)), gl_one()), 1);
-    1
-  }
-  pub fn eg_ops_test() -> G {
-    -- e0 = (0xFEDCBA9876543210, 0x0123456789ABCDEF), e1 = (0x1111111122222222, 0x3333333344444444)
-    let e0 = [[16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8],
-              [239u8, 205u8, 171u8, 137u8, 103u8, 69u8, 35u8, 1u8]];
-    let e1 = [[34u8, 34u8, 34u8, 34u8, 17u8, 17u8, 17u8, 17u8],
-              [68u8, 68u8, 68u8, 68u8, 51u8, 51u8, 51u8, 51u8]];
-    assert_eq!(assert_eg(eg_add(e0, e1),
-      [49u8, 84u8, 118u8, 152u8, 170u8, 203u8, 237u8, 15u8],
-      [51u8, 18u8, 240u8, 205u8, 154u8, 120u8, 86u8, 52u8]), 1);
-    assert_eq!(assert_eg(eg_mul(e0, e1),
-      [10u8, 238u8, 162u8, 36u8, 224u8, 127u8, 182u8, 134u8],
-      [215u8, 234u8, 152u8, 224u8, 219u8, 254u8, 32u8, 67u8]), 1);
-    assert_eq!(assert_eg(eg_inverse(e0),
-      [221u8, 238u8, 29u8, 131u8, 179u8, 89u8, 214u8, 216u8],
-      [114u8, 99u8, 206u8, 108u8, 15u8, 88u8, 161u8, 246u8]), 1);
-    assert_eq!(assert_eg(eg_div(e0, e1),
-      [42u8, 59u8, 64u8, 77u8, 226u8, 214u8, 95u8, 63u8],
-      [200u8, 46u8, 148u8, 147u8, 124u8, 180u8, 248u8, 140u8]), 1);
-    -- e0 · e0⁻¹ = 1
-    assert_eq!(assert_eg(eg_mul(e0, eg_inverse(e0)), gl_one(), gl_zero()), 1);
-    1
-  }
 ⟧
 
 end MultiStark
diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean
index 2bab129d..024adb87 100644
--- a/Ix/MultiStark/Pcs.lean
+++ b/Ix/MultiStark/Pcs.lean
@@ -303,23 +303,6 @@ def pcs := ⟦
     eg_add(t1, t2)
   }
 
-  -- Self-test: arity-2 fold at index 5, log_height 3 vs the `fri_fold_ref`
-  -- reference (computed by the real `TwoAdicFriFolding::fold_row`).
-  pub fn fri_fold_test() -> G {
-    let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
-                       store(ListNode.Cons(1, store(ListNode.Nil)))))));
-    let e0 = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8],
-              [34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8]];
-    let e1 = [[51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8],
-              [68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8]];
-    let beta = [[85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8],
-                [102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8]];
-    let folded = fri_fold2(index_bits, 3, beta, e0, e1);
-    assert_eq!(limb_to_field(folded[0]), 9349172584842537206);
-    assert_eq!(limb_to_field(folded[1]), 984486879173118962);
-    1
-  }
-
   -- ==========================================================================
   -- `open_input` reduced openings (`fri/verifier.rs::open_input` inner loop).
   --
@@ -351,34 +334,6 @@ def pcs := ⟦
     }
   }
 
-  -- Self-test vs `ro_fold_ref`: x at index 5 / log_height 3, then accumulate
-  -- (p_z − p_x)/(z − x) over 3 columns with alpha powers.
-  pub fn ro_fold_test() -> G {
-    let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
-                       store(ListNode.Cons(1, store(ListNode.Nil)))))));
-    let x = ro_x(index_bits, 3);
-    assert_eq!(limb_to_field(x), 117440512);
-    let z = [[154u8, 120u8, 86u8, 52u8, 18u8, 0u8, 0u8, 0u8],
-             [1u8, 239u8, 205u8, 171u8, 0u8, 0u8, 0u8, 0u8]];
-    let alpha = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8],
-                 [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
-    let px0 = [11u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
-    let px1 = [22u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
-    let px2 = [33u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
-    let p_x = store(ListNode.Cons(px0, store(ListNode.Cons(px1,
-                store(ListNode.Cons(px2, store(ListNode.Nil)))))));
-    let pz0 = [[100u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
-    let pz1 = [[200u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
-    let pz2 = [[44u8, 1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [3u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
-    let p_z = store(ListNode.Cons(pz0, store(ListNode.Cons(pz1,
-                store(ListNode.Cons(pz2, store(ListNode.Nil)))))));
-    let q = eg_inverse(eg_sub(z, [x, gl_zero()]));
-    let (ro, _ap) = ro_fold(p_x, p_z, q, alpha, [gl_zero(), gl_zero()], [gl_one(), gl_zero()]);
-    assert_eq!(limb_to_field(ro[0]), 7130765474285082575);
-    assert_eq!(limb_to_field(ro[1]), 12254464995725315436);
-    1
-  }
-
   -- ==========================================================================
   -- PCS (FRI) verification — `two_adic_pcs::verify` + `fri::verify_fri`.
   --
@@ -810,104 +765,6 @@ def pcs := ⟦
       num_circuits, log_blowup, log_gmax, betas, commit_phase_commits, final_poly, num_rounds)
   }
 
-  -- ==========================================================================
-  -- Self-test (validation): the keccak MMCS sponge/compression against
-  -- reference values from `multi-stark`'s own hasher (`pcs_ref_values`).
-  -- Compares each output lane mod p via `limb_to_field` (all reference lanes are
-  -- canonical, so this is exact).
-  -- ==========================================================================
-
-  -- A small single-byte value as a `U64` (8 LE bytes).
-  fn u64_of(b: U8) -> U64 {
-    [b, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]
-  }
-  -- `[i+1, i+2, …, n]` as a `List‹U64›`.
-  fn build_range(i: G, n: G) -> List‹U64› {
-    match n - i {
-      0 => store(ListNode.Nil),
-      _ => store(ListNode.Cons(u64_of(u8_from_field_unsafe(i + 1)), build_range(i + 1, n))),
-    }
-  }
-  fn assert_digest(d: Digest, e0: G, e1: G, e2: G, e3: G) -> G {
-    assert_eq!(limb_to_field(d[0]), e0);
-    assert_eq!(limb_to_field(d[1]), e1);
-    assert_eq!(limb_to_field(d[2]), e2);
-    assert_eq!(limb_to_field(d[3]), e3);
-    1
-  }
-  pub fn pcs_hash_test() -> G {
-    -- LEAF3: hash([1,2,3])
-    let d3 = mmcs_hash_row(build_range(0, 3));
-    assert_eq!(assert_digest(d3, 0xc55a6a1beaea9fec, 0xc8f0dbc4c59ec440,
-                                 0xacb1295de9bfe032, 0x445d569d3dfc9543), 1);
-    -- LEAF17: exactly one full block, no extra permute.
-    let d17 = mmcs_hash_row(build_range(0, 17));
-    assert_eq!(assert_digest(d17, 0x388da73622e8fdd5, 0xec687be9c50d2218,
-                                  0x528d145dfe6571af, 0xd2eb808dfba4703c), 1);
-    -- LEAF22: full block + 5-element partial (two permutes), >20 lanes.
-    let d22 = mmcs_hash_row(build_range(0, 22));
-    assert_eq!(assert_digest(d22, 520358013996801752, 12301199992631688477,
-      8732686820159480415, 10883226686987971725), 1);
-    -- LEAF20: full block + 3-element partial (two permutes).
-    let d20 = mmcs_hash_row(build_range(0, 20));
-    assert_eq!(assert_digest(d20, 0xec696847be88d358, 0x202861c67ff4cec8,
-                                  0x88e006a48aaa0661, 0xabaddb9d32ecd024), 1);
-    -- COMPRESS([1,2,3,4],[5,6,7,8])
-    let c = mmcs_compress([u64_of(1u8), u64_of(2u8), u64_of(3u8), u64_of(4u8)],
-                          [u64_of(5u8), u64_of(6u8), u64_of(7u8), u64_of(8u8)]);
-    assert_eq!(assert_digest(c, 0xda1ef0642722b22e, 0x4851efdbdb2a2fd8,
-                                0x37e8ff900ea95d47, 0xa153eee7805376fb), 1);
-    1
-  }
-
-  -- Merkle `verify_batch` self-test against the `pcs_merkle_ref` reference: a
-  -- cap-height-0 tree over 3 matrices of heights 8/4/2 (log-heights 3/2/1),
-  -- opened at index 5 (path bits 1,0,1). Checks the recomputed root matches the
-  -- committed root and that the cap index is 0, then that a tampered opened row
-  -- yields a different (rejected) root.
-  pub fn pcs_merkle_test() -> G {
-    -- opened rows (matrix order): m0 row5, m1 row2, m2 row1.
-    let row0 = store(ListNode.Cons(u64_of(11u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil)))));
-    let row1 = store(ListNode.Cons(u64_of(107u8), store(ListNode.Cons(u64_of(108u8),
-                 store(ListNode.Cons(u64_of(109u8), store(ListNode.Nil)))))));
-    let row2 = store(ListNode.Cons(u64_of(202u8), store(ListNode.Nil)));
-    let rows = store(ListNode.Cons(row0, store(ListNode.Cons(row1,
-                 store(ListNode.Cons(row2, store(ListNode.Nil)))))));
-    -- log-heights and path bits (index 5 = 0b101, LSB first).
-    let lhs = store(ListNode.Cons(3, store(ListNode.Cons(2, store(ListNode.Cons(1, store(ListNode.Nil)))))));
-    let ibits = store(ListNode.Cons(1, store(ListNode.Cons(0, store(ListNode.Cons(1, store(ListNode.Nil)))))));
-    -- authentication path SIB0, SIB1, SIB2 (each a Digest = [U64; 4]).
-    let sib0 = [[9u8, 36u8, 179u8, 127u8, 205u8, 83u8, 105u8, 203u8],
-                [95u8, 229u8, 105u8, 223u8, 113u8, 55u8, 97u8, 122u8],
-                [135u8, 8u8, 65u8, 248u8, 163u8, 163u8, 68u8, 81u8],
-                [9u8, 11u8, 20u8, 209u8, 10u8, 168u8, 151u8, 125u8]];
-    let sib1 = [[227u8, 58u8, 255u8, 213u8, 77u8, 152u8, 42u8, 77u8],
-                [113u8, 86u8, 2u8, 151u8, 97u8, 63u8, 58u8, 45u8],
-                [228u8, 139u8, 228u8, 194u8, 182u8, 115u8, 107u8, 221u8],
-                [248u8, 16u8, 30u8, 93u8, 176u8, 36u8, 205u8, 88u8]];
-    let sib2 = [[236u8, 144u8, 115u8, 218u8, 140u8, 5u8, 86u8, 229u8],
-                [95u8, 186u8, 252u8, 175u8, 21u8, 247u8, 153u8, 25u8],
-                [113u8, 78u8, 92u8, 200u8, 212u8, 175u8, 247u8, 47u8],
-                [78u8, 145u8, 206u8, 54u8, 175u8, 155u8, 165u8, 206u8]];
-    let proof = store(ListNode.Cons(sib0, store(ListNode.Cons(sib1,
-                  store(ListNode.Cons(sib2, store(ListNode.Nil)))))));
-    let (root, capidx) = mmcs_root(rows, lhs, ibits, proof, 3);
-    assert_eq!(capidx, 0);
-    assert_eq!(assert_digest(root, 0x6211b9a1a116a006, 0x435ee98e1504880f,
-                                   0x900c7274b9a215f, 0xf6e3aaac5dcd90bd), 1);
-    -- tamper: perturb m0's first opened value → root must change.
-    let bad0 = store(ListNode.Cons(u64_of(99u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil)))));
-    let bad_rows = store(ListNode.Cons(bad0, store(ListNode.Cons(row1,
-                     store(ListNode.Cons(row2, store(ListNode.Nil)))))));
-    let cap = store(ListNode.Cons([[6u8, 160u8, 22u8, 161u8, 161u8, 185u8, 17u8, 98u8],
-                                   [15u8, 136u8, 4u8, 21u8, 142u8, 233u8, 94u8, 67u8],
-                                   [95u8, 33u8, 154u8, 75u8, 39u8, 199u8, 0u8, 9u8],
-                                   [189u8, 144u8, 205u8, 93u8, 172u8, 170u8, 227u8, 246u8]],
-                    store(ListNode.Nil)));
-    assert_eq!(mmcs_verify(cap, rows, lhs, ibits, proof, 3), 1);
-    assert_eq!(mmcs_verify(cap, bad_rows, lhs, ibits, proof, 3), 0);
-    1
-  }
 ⟧
 
 end MultiStark
diff --git a/Ix/MultiStark/Tests.lean b/Ix/MultiStark/Tests.lean
new file mode 100644
index 00000000..80148123
--- /dev/null
+++ b/Ix/MultiStark/Tests.lean
@@ -0,0 +1,294 @@
+module
+public import Ix.Aiur.Meta
+
+/-!
+# Self-tests for the Multi-STARK recursive verifier
+
+The `pub fn *_test` entrypoints validating the verifier's primitives against
+Rust reference values (`multi-stark/src/types.rs` test outputs). They live in a
+SEPARATE toplevel fragment, merged on top of the production verifier toplevel
+(`MultiStark.multiStarkTests` in `Ix/MultiStark.lean`): every `pub fn` adds a
+circuit to the compiled system, so keeping the tests out of
+`MultiStark.multiStark` keeps the production verifier's width free of
+test-only circuits.
+
+Cross-fragment references (e.g. `gl_add`, `mmcs_hash_row`, `fri_fold2`,
+`ch_sample_bits`) resolve after the merge — the merged toplevel has a flat
+namespace.
+-/
+
+public section
+
+namespace MultiStark
+
+def tests := ⟦
+  -- ==========================================================================
+  -- Non-native Goldilocks byte arithmetic (vs `gl_ops_ref`).
+  -- ==========================================================================
+  fn assert_g8(x: Goldilocks, e: Goldilocks) -> G {
+    assert_eq!(to_field(x[0]), to_field(e[0]));
+    assert_eq!(to_field(x[1]), to_field(e[1]));
+    assert_eq!(to_field(x[2]), to_field(e[2]));
+    assert_eq!(to_field(x[3]), to_field(e[3]));
+    assert_eq!(to_field(x[4]), to_field(e[4]));
+    assert_eq!(to_field(x[5]), to_field(e[5]));
+    assert_eq!(to_field(x[6]), to_field(e[6]));
+    assert_eq!(to_field(x[7]), to_field(e[7]));
+    1
+  }
+  pub fn gl_addsub_test() -> G {
+    let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210
+    let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0
+    assert_eq!(assert_g8(gl_add(a, b), [255u8, 16u8, 17u8, 17u8, 18u8, 17u8, 17u8, 17u8]), 1);
+    assert_eq!(assert_g8(gl_sub(a, b), [32u8, 83u8, 151u8, 219u8, 31u8, 100u8, 168u8, 236u8]), 1);
+    assert_eq!(assert_g8(gl_sub(b, a), [225u8, 172u8, 104u8, 36u8, 223u8, 155u8, 87u8, 19u8]), 1);
+    -- edge: (p-1) + 5 ≡ 4 ; 5 - (p-1) ≡ 6
+    let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8]; -- 0xFFFFFFFF00000000
+    let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    assert_eq!(assert_g8(gl_add(pm1, five), [4u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1);
+    assert_eq!(assert_g8(gl_sub(five, pm1), [6u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1);
+    1
+  }
+
+  fn assert_eg(x: ExtGoldilocks, e0: Goldilocks, e1: Goldilocks) -> G {
+    assert_eq!(assert_g8(x[0], e0), 1);
+    assert_eq!(assert_g8(x[1], e1), 1);
+    1
+  }
+  pub fn gl_muldiv_test() -> G {
+    let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210
+    let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0
+    assert_eq!(assert_g8(gl_mul(a, b), [212u8, 186u8, 123u8, 108u8, 31u8, 253u8, 234u8, 250u8]), 1);
+    assert_eq!(assert_g8(gl_inverse(a), [97u8, 29u8, 109u8, 46u8, 183u8, 100u8, 8u8, 102u8]), 1);
+    assert_eq!(assert_g8(gl_div(a, b), [63u8, 59u8, 61u8, 54u8, 46u8, 255u8, 29u8, 186u8]), 1);
+    -- edge: (p-1)·5 ≡ p-5
+    let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8];
+    let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    assert_eq!(assert_g8(gl_mul(pm1, five), [252u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8]), 1);
+    -- a·a⁻¹ = 1 and b·b⁻¹ = 1
+    assert_eq!(assert_g8(gl_mul(a, gl_inverse(a)), gl_one()), 1);
+    assert_eq!(assert_g8(gl_mul(b, gl_inverse(b)), gl_one()), 1);
+    1
+  }
+  pub fn eg_ops_test() -> G {
+    -- e0 = (0xFEDCBA9876543210, 0x0123456789ABCDEF), e1 = (0x1111111122222222, 0x3333333344444444)
+    let e0 = [[16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8],
+              [239u8, 205u8, 171u8, 137u8, 103u8, 69u8, 35u8, 1u8]];
+    let e1 = [[34u8, 34u8, 34u8, 34u8, 17u8, 17u8, 17u8, 17u8],
+              [68u8, 68u8, 68u8, 68u8, 51u8, 51u8, 51u8, 51u8]];
+    assert_eq!(assert_eg(eg_add(e0, e1),
+      [49u8, 84u8, 118u8, 152u8, 170u8, 203u8, 237u8, 15u8],
+      [51u8, 18u8, 240u8, 205u8, 154u8, 120u8, 86u8, 52u8]), 1);
+    assert_eq!(assert_eg(eg_mul(e0, e1),
+      [10u8, 238u8, 162u8, 36u8, 224u8, 127u8, 182u8, 134u8],
+      [215u8, 234u8, 152u8, 224u8, 219u8, 254u8, 32u8, 67u8]), 1);
+    assert_eq!(assert_eg(eg_inverse(e0),
+      [221u8, 238u8, 29u8, 131u8, 179u8, 89u8, 214u8, 216u8],
+      [114u8, 99u8, 206u8, 108u8, 15u8, 88u8, 161u8, 246u8]), 1);
+    assert_eq!(assert_eg(eg_div(e0, e1),
+      [42u8, 59u8, 64u8, 77u8, 226u8, 214u8, 95u8, 63u8],
+      [200u8, 46u8, 148u8, 147u8, 124u8, 180u8, 248u8, 140u8]), 1);
+    -- e0 · e0⁻¹ = 1
+    assert_eq!(assert_eg(eg_mul(e0, eg_inverse(e0)), gl_one(), gl_zero()), 1);
+    1
+  }
+
+  -- ==========================================================================
+  -- Challenger (vs `pcs_challenger_ref` / `pcs_challenger4_ref`).
+  -- ==========================================================================
+
+  -- Self-test: `sample_bits(20)` after observing the single `Val`
+  -- `0x0102030405060708` (8 LE bytes) must equal the reference `799146`
+  -- (`pcs_challenger_ref` in `multi-stark/src/types.rs`).
+  pub fn sample_bits_test() -> G {
+    let input = store(ListNode.Cons(8u8, store(ListNode.Cons(7u8, store(ListNode.Cons(6u8,
+      store(ListNode.Cons(5u8, store(ListNode.Cons(4u8, store(ListNode.Cons(3u8,
+      store(ListNode.Cons(2u8, store(ListNode.Cons(1u8, store(ListNode.Nil)))))))))))))))));
+    let (bits, _i, _o) = ch_sample_bits(input, store(ListNode.Nil), 20);
+    assert_eq!(bits_to_num(bits), 799146);
+    1
+  }
+
+  -- Self-test: replay the synthetic PCS challenger sequence from
+  -- `pcs_challenger4_ref` (multi-stark/src/types.rs) and check every sampled
+  -- challenge against the reference. Decisively exercises the two consecutive
+  -- ext samples (α_pcs then α_fri) sharing one hash `output` stream.
+  pub fn pcs_challenger4_test() -> G {
+    -- post-ζ input buffer = observe V0 then V1 (forward / observation order).
+    let v0 = [8u8, 7u8, 6u8, 5u8, 4u8, 3u8, 2u8, 1u8];     -- 0x0102030405060708
+    let v1 = [136u8, 119u8, 102u8, 85u8, 68u8, 51u8, 34u8, 17u8]; -- 0x1122334455667788
+    let input = snoc_b8(snoc_b8(store(ListNode.Nil), v0), v1);
+    -- α_pcs (output empty ⇒ flush), then α_fri (CONSECUTIVE ⇒ thread output).
+    let (apcs, input, o1) = pcs_sample_ext(input, store(ListNode.Nil));
+    let (afri, input, o2) = pcs_sample_ext(input, o1);
+    assert_eq!(limb_to_field(apcs[0]), 2882912772410685996);
+    assert_eq!(limb_to_field(apcs[1]), 910933442133595775);
+    assert_eq!(limb_to_field(afri[0]), 14440140149289897216);
+    assert_eq!(limb_to_field(afri[1]), 8092267645441512944);
+    -- observe commit (clears output), sample β.
+    let v2 = [239u8, 190u8, 173u8, 222u8, 0u8, 0u8, 0u8, 0u8]; -- 0x00000000deadbeef
+    let (input, _oc) = ch_observe_val(input, v2);
+    let (beta, input, _ob) = pcs_sample_ext(input, store(ListNode.Nil));
+    assert_eq!(limb_to_field(beta[0]), 10456048119516576995);
+    assert_eq!(limb_to_field(beta[1]), 3173538015651228593);
+    -- observe final_poly coeff + log_arity (each a Val), then sample the index.
+    let v3 = [4u8, 3u8, 2u8, 1u8, 13u8, 12u8, 11u8, 10u8]; -- 0x0a0b0c0d01020304
+    let v4 = [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];     -- 0x0000000000000002
+    let (input, _o3) = ch_observe_val(input, v3);
+    let (input, _o4) = ch_observe_val(input, v4);
+    let (bits, _bi, _bo) = ch_sample_bits(input, store(ListNode.Nil), 20);
+    assert_eq!(bits_to_num(bits), 336138);
+    1
+  }
+
+  -- ==========================================================================
+  -- FRI fold + reduced openings (vs `fri_fold_ref` / `ro_fold_ref`).
+  -- ==========================================================================
+
+  -- Self-test: arity-2 fold at index 5, log_height 3 vs the `fri_fold_ref`
+  -- reference (computed by the real `TwoAdicFriFolding::fold_row`).
+  pub fn fri_fold_test() -> G {
+    let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
+                       store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    let e0 = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8],
+              [34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8]];
+    let e1 = [[51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8],
+              [68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8]];
+    let beta = [[85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8],
+                [102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8]];
+    let folded = fri_fold2(index_bits, 3, beta, e0, e1);
+    assert_eq!(limb_to_field(folded[0]), 9349172584842537206);
+    assert_eq!(limb_to_field(folded[1]), 984486879173118962);
+    1
+  }
+
+  -- Self-test vs `ro_fold_ref`: x at index 5 / log_height 3, then accumulate
+  -- (p_z − p_x)/(z − x) over 3 columns with alpha powers.
+  pub fn ro_fold_test() -> G {
+    let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0,
+                       store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    let x = ro_x(index_bits, 3);
+    assert_eq!(limb_to_field(x), 117440512);
+    let z = [[154u8, 120u8, 86u8, 52u8, 18u8, 0u8, 0u8, 0u8],
+             [1u8, 239u8, 205u8, 171u8, 0u8, 0u8, 0u8, 0u8]];
+    let alpha = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8],
+                 [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let px0 = [11u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let px1 = [22u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let px2 = [33u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
+    let p_x = store(ListNode.Cons(px0, store(ListNode.Cons(px1,
+                store(ListNode.Cons(px2, store(ListNode.Nil)))))));
+    let pz0 = [[100u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let pz1 = [[200u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let pz2 = [[44u8, 1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [3u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]];
+    let p_z = store(ListNode.Cons(pz0, store(ListNode.Cons(pz1,
+                store(ListNode.Cons(pz2, store(ListNode.Nil)))))));
+    let q = eg_inverse(eg_sub(z, [x, gl_zero()]));
+    let (ro, _ap) = ro_fold(p_x, p_z, q, alpha, [gl_zero(), gl_zero()], [gl_one(), gl_zero()]);
+    assert_eq!(limb_to_field(ro[0]), 7130765474285082575);
+    assert_eq!(limb_to_field(ro[1]), 12254464995725315436);
+    1
+  }
+
+  -- ==========================================================================
+  -- Keccak MMCS sponge/compression + Merkle verify_batch
+  -- (vs `pcs_ref_values` / `pcs_merkle_ref`). Compares each output lane mod p
+  -- via `limb_to_field` (all reference lanes are canonical, so this is exact).
+  -- ==========================================================================
+
+  -- A small single-byte value as a `U64` (8 LE bytes).
+  fn u64_of(b: U8) -> U64 {
+    [b, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]
+  }
+  -- `[i+1, i+2, …, n]` as a `List‹U64›`.
+  fn build_range(i: G, n: G) -> List‹U64› {
+    match n - i {
+      0 => store(ListNode.Nil),
+      _ => store(ListNode.Cons(u64_of(u8_from_field_unsafe(i + 1)), build_range(i + 1, n))),
+    }
+  }
+  fn assert_digest(d: Digest, e0: G, e1: G, e2: G, e3: G) -> G {
+    assert_eq!(limb_to_field(d[0]), e0);
+    assert_eq!(limb_to_field(d[1]), e1);
+    assert_eq!(limb_to_field(d[2]), e2);
+    assert_eq!(limb_to_field(d[3]), e3);
+    1
+  }
+  pub fn pcs_hash_test() -> G {
+    -- LEAF3: hash([1,2,3])
+    let d3 = mmcs_hash_row(build_range(0, 3));
+    assert_eq!(assert_digest(d3, 0xc55a6a1beaea9fec, 0xc8f0dbc4c59ec440,
+                                 0xacb1295de9bfe032, 0x445d569d3dfc9543), 1);
+    -- LEAF17: exactly one full block, no extra permute.
+    let d17 = mmcs_hash_row(build_range(0, 17));
+    assert_eq!(assert_digest(d17, 0x388da73622e8fdd5, 0xec687be9c50d2218,
+                                  0x528d145dfe6571af, 0xd2eb808dfba4703c), 1);
+    -- LEAF22: full block + 5-element partial (two permutes), >20 lanes.
+    let d22 = mmcs_hash_row(build_range(0, 22));
+    assert_eq!(assert_digest(d22, 520358013996801752, 12301199992631688477,
+      8732686820159480415, 10883226686987971725), 1);
+    -- LEAF20: full block + 3-element partial (two permutes).
+    let d20 = mmcs_hash_row(build_range(0, 20));
+    assert_eq!(assert_digest(d20, 0xec696847be88d358, 0x202861c67ff4cec8,
+                                  0x88e006a48aaa0661, 0xabaddb9d32ecd024), 1);
+    -- COMPRESS([1,2,3,4],[5,6,7,8])
+    let c = mmcs_compress([u64_of(1u8), u64_of(2u8), u64_of(3u8), u64_of(4u8)],
+                          [u64_of(5u8), u64_of(6u8), u64_of(7u8), u64_of(8u8)]);
+    assert_eq!(assert_digest(c, 0xda1ef0642722b22e, 0x4851efdbdb2a2fd8,
+                                0x37e8ff900ea95d47, 0xa153eee7805376fb), 1);
+    1
+  }
+
+  -- Merkle `verify_batch` self-test against the `pcs_merkle_ref` reference: a
+  -- cap-height-0 tree over 3 matrices of heights 8/4/2 (log-heights 3/2/1),
+  -- opened at index 5 (path bits 1,0,1). Checks the recomputed root matches the
+  -- committed root and that the cap index is 0, then that a tampered opened row
+  -- yields a different (rejected) root.
+  pub fn pcs_merkle_test() -> G {
+    -- opened rows (matrix order): m0 row5, m1 row2, m2 row1.
+    let row0 = store(ListNode.Cons(u64_of(11u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil)))));
+    let row1 = store(ListNode.Cons(u64_of(107u8), store(ListNode.Cons(u64_of(108u8),
+                 store(ListNode.Cons(u64_of(109u8), store(ListNode.Nil)))))));
+    let row2 = store(ListNode.Cons(u64_of(202u8), store(ListNode.Nil)));
+    let rows = store(ListNode.Cons(row0, store(ListNode.Cons(row1,
+                 store(ListNode.Cons(row2, store(ListNode.Nil)))))));
+    -- log-heights and path bits (index 5 = 0b101, LSB first).
+    let lhs = store(ListNode.Cons(3, store(ListNode.Cons(2, store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    let ibits = store(ListNode.Cons(1, store(ListNode.Cons(0, store(ListNode.Cons(1, store(ListNode.Nil)))))));
+    -- authentication path SIB0, SIB1, SIB2 (each a Digest = [U64; 4]).
+    let sib0 = [[9u8, 36u8, 179u8, 127u8, 205u8, 83u8, 105u8, 203u8],
+                [95u8, 229u8, 105u8, 223u8, 113u8, 55u8, 97u8, 122u8],
+                [135u8, 8u8, 65u8, 248u8, 163u8, 163u8, 68u8, 81u8],
+                [9u8, 11u8, 20u8, 209u8, 10u8, 168u8, 151u8, 125u8]];
+    let sib1 = [[227u8, 58u8, 255u8, 213u8, 77u8, 152u8, 42u8, 77u8],
+                [113u8, 86u8, 2u8, 151u8, 97u8, 63u8, 58u8, 45u8],
+                [228u8, 139u8, 228u8, 194u8, 182u8, 115u8, 107u8, 221u8],
+                [248u8, 16u8, 30u8, 93u8, 176u8, 36u8, 205u8, 88u8]];
+    let sib2 = [[236u8, 144u8, 115u8, 218u8, 140u8, 5u8, 86u8, 229u8],
+                [95u8, 186u8, 252u8, 175u8, 21u8, 247u8, 153u8, 25u8],
+                [113u8, 78u8, 92u8, 200u8, 212u8, 175u8, 247u8, 47u8],
+                [78u8, 145u8, 206u8, 54u8, 175u8, 155u8, 165u8, 206u8]];
+    let proof = store(ListNode.Cons(sib0, store(ListNode.Cons(sib1,
+                  store(ListNode.Cons(sib2, store(ListNode.Nil)))))));
+    let (root, capidx) = mmcs_root(rows, lhs, ibits, proof, 3);
+    assert_eq!(capidx, 0);
+    assert_eq!(assert_digest(root, 0x6211b9a1a116a006, 0x435ee98e1504880f,
+                                   0x900c7274b9a215f, 0xf6e3aaac5dcd90bd), 1);
+    -- tamper: perturb m0's first opened value → root must change.
+    let bad0 = store(ListNode.Cons(u64_of(99u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil)))));
+    let bad_rows = store(ListNode.Cons(bad0, store(ListNode.Cons(row1,
+                     store(ListNode.Cons(row2, store(ListNode.Nil)))))));
+    let cap = store(ListNode.Cons([[6u8, 160u8, 22u8, 161u8, 161u8, 185u8, 17u8, 98u8],
+                                   [15u8, 136u8, 4u8, 21u8, 142u8, 233u8, 94u8, 67u8],
+                                   [95u8, 33u8, 154u8, 75u8, 39u8, 199u8, 0u8, 9u8],
+                                   [189u8, 144u8, 205u8, 93u8, 172u8, 170u8, 227u8, 246u8]],
+                    store(ListNode.Nil)));
+    assert_eq!(mmcs_verify(cap, rows, lhs, ibits, proof, 3), 1);
+    assert_eq!(mmcs_verify(cap, bad_rows, lhs, ibits, proof, 3), 0);
+    1
+  }
+⟧
+
+end MultiStark
+
+end
diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean
index af4aeac0..e3207e31 100644
--- a/Ix/MultiStark/Verifier.lean
+++ b/Ix/MultiStark/Verifier.lean
@@ -210,18 +210,6 @@ def verifier := ⟦
     (take_bits(sample8_bits(bytes), n), i1, o1)
   }
 
-  -- Self-test: `sample_bits(20)` after observing the single `Val`
-  -- `0x0102030405060708` (8 LE bytes) must equal the reference `799146`
-  -- (`pcs_challenger_ref` in `multi-stark/src/types.rs`).
-  pub fn sample_bits_test() -> G {
-    let input = store(ListNode.Cons(8u8, store(ListNode.Cons(7u8, store(ListNode.Cons(6u8,
-      store(ListNode.Cons(5u8, store(ListNode.Cons(4u8, store(ListNode.Cons(3u8,
-      store(ListNode.Cons(2u8, store(ListNode.Cons(1u8, store(ListNode.Nil)))))))))))))))));
-    let (bits, _i, _o) = ch_sample_bits(input, store(ListNode.Nil), 20);
-    assert_eq!(bits_to_num(bits), 799146);
-    1
-  }
-
   -- Append (observe) 8 little-endian bytes of `b` at the END of the challenger
   -- input buffer. The transcript is held front-to-back (front = first observed =
   -- first hashed, matching `keccak256`'s absorption order), so an observation
@@ -260,38 +248,6 @@ def verifier := ⟦
     ([gl_reduce(c0), gl_reduce(c1)], i1, o1)
   }
 
-  -- Self-test: replay the synthetic PCS challenger sequence from
-  -- `pcs_challenger4_ref` (multi-stark/src/types.rs) and check every sampled
-  -- challenge against the reference. Decisively exercises the two consecutive
-  -- ext samples (α_pcs then α_fri) sharing one hash `output` stream.
-  pub fn pcs_challenger4_test() -> G {
-    -- post-ζ input buffer = observe V0 then V1 (forward / observation order).
-    let v0 = [8u8, 7u8, 6u8, 5u8, 4u8, 3u8, 2u8, 1u8];     -- 0x0102030405060708
-    let v1 = [136u8, 119u8, 102u8, 85u8, 68u8, 51u8, 34u8, 17u8]; -- 0x1122334455667788
-    let input = snoc_b8(snoc_b8(store(ListNode.Nil), v0), v1);
-    -- α_pcs (output empty ⇒ flush), then α_fri (CONSECUTIVE ⇒ thread output).
-    let (apcs, input, o1) = pcs_sample_ext(input, store(ListNode.Nil));
-    let (afri, input, o2) = pcs_sample_ext(input, o1);
-    assert_eq!(limb_to_field(apcs[0]), 2882912772410685996);
-    assert_eq!(limb_to_field(apcs[1]), 910933442133595775);
-    assert_eq!(limb_to_field(afri[0]), 14440140149289897216);
-    assert_eq!(limb_to_field(afri[1]), 8092267645441512944);
-    -- observe commit (clears output), sample β.
-    let v2 = [239u8, 190u8, 173u8, 222u8, 0u8, 0u8, 0u8, 0u8]; -- 0x00000000deadbeef
-    let (input, _oc) = ch_observe_val(input, v2);
-    let (beta, input, _ob) = pcs_sample_ext(input, store(ListNode.Nil));
-    assert_eq!(limb_to_field(beta[0]), 10456048119516576995);
-    assert_eq!(limb_to_field(beta[1]), 3173538015651228593);
-    -- observe final_poly coeff + log_arity (each a Val), then sample the index.
-    let v3 = [4u8, 3u8, 2u8, 1u8, 13u8, 12u8, 11u8, 10u8]; -- 0x0a0b0c0d01020304
-    let v4 = [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];     -- 0x0000000000000002
-    let (input, _o3) = ch_observe_val(input, v3);
-    let (input, _o4) = ch_observe_val(input, v4);
-    let (bits, _bi, _bo) = ch_sample_bits(input, store(ListNode.Nil), 20);
-    assert_eq!(bits_to_num(bits), 336138);
-    1
-  }
-
   -- Append a claim's values (each `Val` as 8 LE bytes) onto `tail`, in order.
   fn claim_vals_onto(vals: List‹U64›, tail: ByteStream) -> ByteStream {
     match load(vals) {
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 39a1a3ec..3b1c5d70 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -129,39 +129,34 @@ def main : IO UInt32 := do
     (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend #[Aiur.G.ofNat 2] claimGs
 
   -- ── 4. recursive verifier system ─────────────────────────────────────────
+  -- The PRODUCTION toplevel (no test circuits — every `pub fn` adds width).
   let vTop ← match MultiStark.multiStark with
     | .error e => IO.eprintln s!"verifier toplevel merge failed: {e}"; return 1
     | .ok t => pure t
   let (vCompiled, _vSystem) ← buildSystem "verifier" vTop
-  -- TEMP: validate the PCS keccak-MMCS sponge primitives + Merkle verify_batch.
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_hash_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ pcs_hash_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ pcs_hash_test: keccak MMCS sponge/compress match reference"
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_merkle_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ pcs_merkle_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ pcs_merkle_test: Merkle verify_batch matches reference (root + tamper)"
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `sample_bits_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ sample_bits_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ sample_bits_test: challenger sample_bits matches reference"
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `pcs_challenger4_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ pcs_challenger4_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ pcs_challenger4_test: PCS challenger continuation (α_pcs/α_fri/β/index) matches reference"
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `fri_fold_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ fri_fold_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ fri_fold_test: FRI arity-2 fold_row matches reference"
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `ro_fold_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ ro_fold_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ ro_fold_test: open_input reduced-opening math matches reference"
-  -- TEMP: validate non-native Goldilocks byte-arithmetic (vs `gl_ops_ref`).
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `gl_addsub_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ gl_addsub_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ gl_addsub_test: non-native Goldilocks add/sub match reference"
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `gl_muldiv_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ gl_muldiv_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ gl_muldiv_test: non-native Goldilocks mul/inverse/div match reference"
-  match vCompiled.bytecode.execute (vCompiled.getFuncIdx `eg_ops_test).get! #[] default with
-  | .error e => IO.eprintln s!"✗ eg_ops_test FAILED — {e}"; return 1
-  | .ok _ => IO.println "✓ eg_ops_test: non-native ExtGoldilocks add/mul/inverse/div match reference"
+  -- The TESTS toplevel: the verifier plus the `*_test` entrypoints. Compiled
+  -- separately so the test circuits never widen the production system; only the
+  -- bytecode is needed (tests are executed, never proven).
+  let tTop ← match MultiStark.multiStarkTests with
+    | .error e => IO.eprintln s!"tests toplevel merge failed: {e}"; return 1
+    | .ok t => pure t
+  let tCompiled ← match tTop.compile with
+    | .error e => throw <| IO.userError s!"verifier-tests: compilation failed: {e}"
+    | .ok c => pure c
+  let runTest (name : Lean.Name) (okMsg : String) : IO Bool := do
+    match tCompiled.bytecode.execute (tCompiled.getFuncIdx name).get! #[] default with
+    | .error e => IO.eprintln s!"✗ {name} FAILED — {e}"; pure false
+    | .ok _ => IO.println s!"✓ {okMsg}"; pure true
+  -- Validate the verifier's primitives against the Rust reference values.
+  if !(← runTest `pcs_hash_test "pcs_hash_test: keccak MMCS sponge/compress match reference") then return 1
+  if !(← runTest `pcs_merkle_test "pcs_merkle_test: Merkle verify_batch matches reference (root + tamper)") then return 1
+  if !(← runTest `sample_bits_test "sample_bits_test: challenger sample_bits matches reference") then return 1
+  if !(← runTest `pcs_challenger4_test "pcs_challenger4_test: PCS challenger continuation (α_pcs/α_fri/β/index) matches reference") then return 1
+  if !(← runTest `fri_fold_test "fri_fold_test: FRI arity-2 fold_row matches reference") then return 1
+  if !(← runTest `ro_fold_test "ro_fold_test: open_input reduced-opening math matches reference") then return 1
+  if !(← runTest `gl_addsub_test "gl_addsub_test: non-native Goldilocks add/sub match reference") then return 1
+  if !(← runTest `gl_muldiv_test "gl_muldiv_test: non-native Goldilocks mul/inverse/div match reference") then return 1
+  if !(← runTest `eg_ops_test "eg_ops_test: non-native ExtGoldilocks add/mul/inverse/div match reference") then return 1
   let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with
     | some i => pure i
     | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1

From 8b2b46158c9dfec38fd39aafb67151b47d1dbc88 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Thu, 11 Jun 2026 16:03:10 -0300
Subject: [PATCH 27/28] fixup

---
 Ix/MultiStark.lean     | 33 +++++++++++++++++----------------
 RecursiveVerifier.lean | 14 +++++++-------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean
index cf870e81..f30912ef 100644
--- a/Ix/MultiStark.lean
+++ b/Ix/MultiStark.lean
@@ -16,7 +16,7 @@ public import Ix.MultiStark.Tests
 The recursive verifier. Its public statement is purely existential: *"there
 exists a valid multi-stark proof, under the FRI parameters given as public
 input, for the constraint system with this keccak-256 digest and these public
-claims."* The proof itself is **non-deterministic advice** (fed on IO key `[0]`,
+claims."* The proof itself is **non-deterministic advice** (fed on IO channel 0,
 never hashed or otherwise bound as a public input): the Fiat-Shamir transcript
 replay plus the Merkle/OOD/FRI checks are exactly what make any accepted advice
 a valid proof — a hash binding of the proof bytes would add nothing to the
@@ -37,27 +37,28 @@ namespace MultiStark
 def entrypoints := ⟦
   -- Public inputs: the keccak-256 digests of the verifying key and the claims
   -- (4 little-endian u64 lanes each) plus the variable FRI parameters. The
-  -- proof is pure non-deterministic advice on IO key `[0]` — see the module
-  -- docstring.
+  -- proof is pure non-deterministic advice on IO channel 0 — see the module
+  -- docstring. One stream per channel (0 = proof, 1 = vk, 2 = claims), each
+  -- registered under key `[0]` on its channel.
   pub fn verify_multi_stark_proof(system_digest: [[U8; 8]; 4], claims_digest: [[U8; 8]; 4], num_queries: G, commit_pow_bits: G, log_blowup: G) {
-    -- Proof advice from IO key [0]: deserialize, assert fully consumed.
-    let (idx, len) = io_get_info([0]);
-    let bytes = #read_byte_stream(idx, len);
+    -- Proof advice from IO channel 0: deserialize, assert fully consumed.
+    let (idx, len) = io_get_info(0, [0]);
+    let bytes = #read_byte_stream(0, idx, len);
     let (proof, rest) = read_proof(bytes);
     assert_eq!(load(rest), ListNode.Nil);
-    -- Verifying key (`System<AiurCircuit>`) from IO key [1]: bind the bytes to
-    -- the public keccak-256 `system_digest`, then reconstruct the system.
-    let (sidx, slen) = io_get_info([1]);
-    let sbytes = #read_byte_stream(sidx, slen);
+    -- Verifying key (`System<AiurCircuit>`) from IO channel 1: bind the bytes
+    -- to the public keccak-256 `system_digest`, then reconstruct the system.
+    let (sidx, slen) = io_get_info(1, [0]);
+    let sbytes = #read_byte_stream(1, sidx, slen);
     assert_eq!(keccak256(sbytes), system_digest);
     let (sys, srest) = read_system(sbytes);
     assert_eq!(load(srest), ListNode.Nil);
-    -- Public claims (`&[&[Val]]`) from IO key [2]: bind the bytes to the public
-    -- keccak-256 `claims_digest`, then deserialize. Binding them as a public
-    -- input is what makes the lookup argument sound (a prover cannot choose
-    -- claims adaptively).
-    let (cidx, clen) = io_get_info([2]);
-    let cbytes = #read_byte_stream(cidx, clen);
+    -- Public claims (`&[&[Val]]`) from IO channel 2: bind the bytes to the
+    -- public keccak-256 `claims_digest`, then deserialize. Binding them as a
+    -- public input is what makes the lookup argument sound (a prover cannot
+    -- choose claims adaptively).
+    let (cidx, clen) = io_get_info(2, [0]);
+    let cbytes = #read_byte_stream(2, cidx, clen);
     assert_eq!(keccak256(cbytes), claims_digest);
     let (claims, crest) = read_claims(cbytes);
     assert_eq!(load(crest), ListNode.Nil);
diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean
index 3b1c5d70..b55d93ac 100644
--- a/RecursiveVerifier.lean
+++ b/RecursiveVerifier.lean
@@ -14,9 +14,9 @@ A standalone executable that exercises the Multi-STARK verifier scaffold
 
 1. Define a tiny Aiur toplevel with a `factorial` entrypoint.
 2. Prove `factorial(5) = 120` with the Multi-STARK backend.
-3. Serialize that proof (`Proof.toBytes`, bincode) and seed it on the IO
-   channel under key `[0]` — the hint the recursive verifier reads
-   non-deterministically via `#read_byte_stream`.
+3. Serialize that proof (`Proof.toBytes`, bincode) and seed it on IO
+   channel 0 (key `[0]`; vk on channel 1, claims on channel 2) — the hint the
+   recursive verifier reads non-deterministically via `#read_byte_stream`.
 4. Run the `verify_multi_stark_proof` entrypoint over that IO buffer and
    prove *its* execution, producing a recursive proof.
 
@@ -126,7 +126,7 @@ def main : IO UInt32 := do
       Aiur.G.ofNat recCommitParams.logBlowup]
   let input : Array Aiur.G := sysDigestInput ++ claimsDigestInput ++ friParamInput
   let verifierIO : IOBuffer :=
-    (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend #[Aiur.G.ofNat 2] claimGs
+    (((default : IOBuffer).extend 0 #[Aiur.G.ofNat 0] proofGs).extend 1 #[Aiur.G.ofNat 0] vkGs).extend 2 #[Aiur.G.ofNat 0] claimGs
 
   -- ── 4. recursive verifier system ─────────────────────────────────────────
   -- The PRODUCTION toplevel (no test circuits — every `pub fn` adds width).
@@ -175,7 +175,7 @@ def main : IO UInt32 := do
     --        replayed transcript from the one the proof was generated under.
     let badProofGs := proofGs.set! 0 (Aiur.G.ofNat ((proofBytes.data[0]!.toNat + 1) % 256))
     let badProofIO : IOBuffer :=
-      (((default : IOBuffer).extend #[Aiur.G.ofNat 0] badProofGs).extend #[Aiur.G.ofNat 1] vkGs).extend #[Aiur.G.ofNat 2] claimGs
+      (((default : IOBuffer).extend 0 #[Aiur.G.ofNat 0] badProofGs).extend 1 #[Aiur.G.ofNat 0] vkGs).extend 2 #[Aiur.G.ofNat 0] claimGs
     match vCompiled.bytecode.execute vIdx input badProofIO with
     | .error _ => IO.println "✓ tampered proof advice correctly rejected (verification checks)"
     | .ok _ => IO.eprintln "✗ tampered proof advice was NOT rejected"; return 1
@@ -190,8 +190,8 @@ def main : IO UInt32 := do
     let badClaimInput : Array Aiur.G :=
       sysDigestInput ++ (badClaimsDigest.data.map .ofUInt8) ++ friParamInput
     let badClaimIO : IOBuffer :=
-      (((default : IOBuffer).extend #[Aiur.G.ofNat 0] proofGs).extend #[Aiur.G.ofNat 1] vkGs).extend
-        #[Aiur.G.ofNat 2] (badClaimBytes.data.map .ofUInt8)
+      (((default : IOBuffer).extend 0 #[Aiur.G.ofNat 0] proofGs).extend 1 #[Aiur.G.ofNat 0] vkGs).extend
+        2 #[Aiur.G.ofNat 0] (badClaimBytes.data.map .ofUInt8)
     match vCompiled.bytecode.execute vIdx badClaimInput badClaimIO with
     | .error _ => IO.println "✓ tampered claim correctly rejected (OOD/accumulator mismatch)"
     | .ok _ => IO.eprintln "✗ tampered claim was NOT rejected"; return 1

From 869af4dffefd85aeca1dafcb4492816f18ee8cf6 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Thu, 11 Jun 2026 16:19:42 -0300
Subject: [PATCH 28/28] clippy and fmt

---
 src/aiur/vk_codec.rs | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/aiur/vk_codec.rs b/src/aiur/vk_codec.rs
index e719c47a..373ca6e7 100644
--- a/src/aiur/vk_codec.rs
+++ b/src/aiur/vk_codec.rs
@@ -32,7 +32,9 @@ fn serde_config() -> Configuration<LittleEndian, Fixint> {
 
 /// Serialize the verifying key `System<AiurCircuit>` (preprocessed traces are
 /// skipped — see the module docs).
-pub(crate) fn to_bytes(system: &System<AiurCircuit>) -> Result<Vec<u8>, String> {
+pub(crate) fn to_bytes(
+  system: &System<AiurCircuit>,
+) -> Result<Vec<u8>, String> {
   encode_to_vec(system, serde_config()).map_err(|e| e.to_string())
 }
 
@@ -42,13 +44,18 @@ pub(crate) fn from_bytes(bytes: &[u8]) -> Result<System<AiurCircuit>, String> {
   let (system, consumed) =
     decode_from_slice(bytes, serde_config()).map_err(|e| e.to_string())?;
   if consumed != bytes.len() {
-    return Err(format!("trailing data: consumed {consumed} of {}", bytes.len()));
+    return Err(format!(
+      "trailing data: consumed {consumed} of {}",
+      bytes.len()
+    ));
   }
   Ok(system)
 }
 
 /// Convenience: serialize the verifying key of a built [`AiurSystem`].
-pub(crate) fn aiur_system_to_bytes(sys: &AiurSystem) -> Result<Vec<u8>, String> {
+pub(crate) fn aiur_system_to_bytes(
+  sys: &AiurSystem,
+) -> Result<Vec<u8>, String> {
   to_bytes(&sys.system)
 }
 
@@ -137,7 +144,7 @@ impl<'a> R<'a> {
     Ok(u64::from_le_bytes(self.take(8)?.try_into().unwrap()))
   }
   fn usize(&mut self) -> Result<usize, String> {
-    Ok(self.u64()? as usize)
+    usize::try_from(self.u64()?).map_err(|e| format!("usize overflow: {e}"))
   }
   fn g(&mut self) -> Result<Val, String> {
     Ok(Val::from_u64(self.u64()?))
@@ -230,8 +237,7 @@ impl<'a> R<'a> {
     })
   }
   fn commitment(&mut self) -> Result<Commitment, String> {
-    let caps =
-      self.vec(|r| Ok([r.u64()?, r.u64()?, r.u64()?, r.u64()?]))?;
+    let caps = self.vec(|r| Ok([r.u64()?, r.u64()?, r.u64()?, r.u64()?]))?;
     Ok(Commitment::from(caps))
   }
   fn option<T>(
@@ -252,15 +258,17 @@ pub(crate) fn manual_deserialize(
   bytes: &[u8],
 ) -> Result<System<AiurCircuit>, String> {
   let mut r = R { buf: bytes, pos: 0 };
-  let commitment_parameters = CommitmentParameters {
-    log_blowup: r.usize()?,
-    cap_height: r.usize()?,
-  };
+  let commitment_parameters =
+    CommitmentParameters { log_blowup: r.usize()?, cap_height: r.usize()? };
   let circuits = r.vec(R::circuit)?;
   let preprocessed_commit = r.option(R::commitment)?;
   let preprocessed_indices = r.vec(|r| r.option(R::usize))?;
   if r.pos != bytes.len() {
-    return Err(format!("trailing data: consumed {} of {}", r.pos, bytes.len()));
+    return Err(format!(
+      "trailing data: consumed {} of {}",
+      r.pos,
+      bytes.len()
+    ));
   }
   Ok(System {
     commitment_parameters,