diff --git a/Cargo.lock b/Cargo.lock index 7de24578..b1df7dd0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1954,7 +1954,6 @@ dependencies = [ [[package]] name = "multi-stark" version = "0.1.0" -source = "git+https://github.com/argumentcomputer/multi-stark.git?rev=9ecab51d553445c0cc7b571af00a76b8a83a6f8c#9ecab51d553445c0cc7b571af00a76b8a83a6f8c" dependencies = [ "bincode", "p3-air", diff --git a/Cargo.toml b/Cargo.toml index 58b90be3..8339ae85 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ itertools = "0.14.0" indexmap = { version = "2", features = ["rayon"] } lean-ffi = { git = "https://github.com/argumentcomputer/lean-ffi", rev = "cc98ebf67bf453ac3827cb767f78b13ea674dd6a" } mimalloc = { version = "0.1", default-features = false } -multi-stark = { git = "https://github.com/argumentcomputer/multi-stark.git", rev = "9ecab51d553445c0cc7b571af00a76b8a83a6f8c" } +multi-stark = { path = "multi-stark" } num-bigint = "0.4.6" rayon = "1" rustc-hash = "2" @@ -31,8 +31,8 @@ getrandom = { version = "0.3", optional = true } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-texray = { git = "https://github.com/argumentcomputer/tracing-texray", rev = "8ce04e3422cd48e68ef47fab95dba7d06b8c368c" } -bincode = { version = "2.0.1", optional = true } -serde = { version = "1.0.219", features = ["derive"], optional = true } +bincode = { version = "2.0.1" } +serde = { version = "1.0.219", features = ["derive"] } [dev-dependencies] quickcheck = "1.0.3" @@ -43,7 +43,7 @@ quickcheck_macros = "1.0.0" default = [] parallel = ["multi-stark/parallel"] test-ffi = [] -net = ["bytes", "tokio", "iroh", "iroh-base", "n0-error", "getrandom", "bincode", "serde" ] +net = ["bytes", "tokio", "iroh", "iroh-base", "n0-error", "getrandom" ] [profile.dev] panic = "abort" diff --git a/Ix/Aiur/Protocol.lean b/Ix/Aiur/Protocol.lean index df320811..7bfdd5df 100644 --- a/Ix/Aiur/Protocol.lean +++ b/Ix/Aiur/Protocol.lean @@ -47,6 +47,10 @@ namespace AiurSystem @[extern "rs_aiur_system_build"] opaque build : @&Bytecode.Toplevel → @&CommitmentParameters → AiurSystem +/-- Serialize the verifying key (`System`) to bytes. -/ +@[extern "rs_aiur_system_vk_bytes"] +opaque vkBytes : @& AiurSystem → ByteArray + @[extern "rs_aiur_system_prove"] private opaque prove' : @& AiurSystem → @& FriParameters → @& Bytecode.FunIdx → @& Array G → diff --git a/Ix/MultiStark.lean b/Ix/MultiStark.lean new file mode 100644 index 00000000..f30912ef --- /dev/null +++ b/Ix/MultiStark.lean @@ -0,0 +1,98 @@ +module +public import Ix.Aiur.Meta +public import Ix.IxVM.Core +public import Ix.IxVM.ByteStream +public import Ix.MultiStark.Goldilocks +public import Ix.MultiStark.Deserialize +public import Ix.MultiStark.Keccak +public import Ix.MultiStark.Pcs +public import Ix.MultiStark.SystemDeserialize +public import Ix.MultiStark.Verifier +public import Ix.MultiStark.Tests + +/-! +# Multi-STARK proof verifier (Aiur) + +The recursive verifier. Its public statement is purely existential: *"there +exists a valid multi-stark proof, under the FRI parameters given as public +input, for the constraint system with this keccak-256 digest and these public +claims."* The proof itself is **non-deterministic advice** (fed on IO channel 0, +never hashed or otherwise bound as a public input): the Fiat-Shamir transcript +replay plus the Merkle/OOD/FRI checks are exactly what make any accepted advice +a valid proof — a hash binding of the proof bytes would add nothing to the +statement, while costing one keccak-f per 136 bytes in-circuit. + +The verifying key and claims, by contrast, ARE digest-bound (`system_digest`, +`claims_digest`): they determine *what was proven*. + +Fixed protocol assumptions (our system): `queryProofOfWorkBits = 0`, +`capHeight = 0`, `maxLogArity = 1`, `logFinalPolyLen = 0`. The variable FRI +parameters (`num_queries`, `commit_pow_bits`, `log_blowup`) are public inputs. +-/ + +public section + +namespace MultiStark + +def entrypoints := ⟦ + -- Public inputs: the keccak-256 digests of the verifying key and the claims + -- (4 little-endian u64 lanes each) plus the variable FRI parameters. The + -- proof is pure non-deterministic advice on IO channel 0 — see the module + -- docstring. One stream per channel (0 = proof, 1 = vk, 2 = claims), each + -- registered under key `[0]` on its channel. + pub fn verify_multi_stark_proof(system_digest: [[U8; 8]; 4], claims_digest: [[U8; 8]; 4], num_queries: G, commit_pow_bits: G, log_blowup: G) { + -- Proof advice from IO channel 0: deserialize, assert fully consumed. + let (idx, len) = io_get_info(0, [0]); + let bytes = #read_byte_stream(0, idx, len); + let (proof, rest) = read_proof(bytes); + assert_eq!(load(rest), ListNode.Nil); + -- Verifying key (`System`) from IO channel 1: bind the bytes + -- to the public keccak-256 `system_digest`, then reconstruct the system. + let (sidx, slen) = io_get_info(1, [0]); + let sbytes = #read_byte_stream(1, sidx, slen); + assert_eq!(keccak256(sbytes), system_digest); + let (sys, srest) = read_system(sbytes); + assert_eq!(load(srest), ListNode.Nil); + -- Public claims (`&[&[Val]]`) from IO channel 2: bind the bytes to the + -- public keccak-256 `claims_digest`, then deserialize. Binding them as a + -- public input is what makes the lookup argument sound (a prover cannot + -- choose claims adaptively). + let (cidx, clen) = io_get_info(2, [0]); + let cbytes = #read_byte_stream(2, cidx, clen); + assert_eq!(keccak256(cbytes), claims_digest); + let (claims, crest) = read_claims(cbytes); + assert_eq!(load(crest), ListNode.Nil); + -- Structural + accumulator + PCS checks. + assert_eq!(verify(proof), 1); + -- Step 3 + 5: prover-faithful Fiat-Shamir replay and the out-of-domain + -- composition/quotient check, `composition(ζ)·inv_vanishing(ζ) == quotient(ζ)`. + assert_eq!(ood_verify(sys, proof, claims, num_queries, commit_pow_bits, log_blowup), 1); + () + } +⟧ + +/-- The standalone Multi-STARK verifier toplevel: `core` (lists/options) + +`byteStream` (`U64`, `flatten_u64`, `read_byte_stream`, …) + the deserializer, +the keccak-256 implementation, and the entrypoint. -/ +def multiStark : Except Aiur.Global Aiur.Source.Toplevel := do + let t ← IxVM.core.merge IxVM.byteStream + let t ← t.merge MultiStark.goldilocks + let t ← t.merge deserialize + let t ← t.merge keccak + let t ← t.merge systemDeserialize + let t ← t.merge pcs + let t ← t.merge verifier + t.merge entrypoints + +/-- The verifier toplevel PLUS its self-test entrypoints +(`Ix/MultiStark/Tests.lean`). Kept separate from `multiStark` because every +`pub fn` adds a circuit to the compiled system — the production verifier should +not carry test-only width. Use this toplevel only to run the `*_test` +entrypoints. -/ +def multiStarkTests : Except Aiur.Global Aiur.Source.Toplevel := do + let t ← multiStark + t.merge tests + +end MultiStark + +end diff --git a/Ix/MultiStark/Deserialize.lean b/Ix/MultiStark/Deserialize.lean new file mode 100644 index 00000000..d814a434 --- /dev/null +++ b/Ix/MultiStark/Deserialize.lean @@ -0,0 +1,406 @@ +module +public import Ix.Aiur.Meta +public import Ix.IxVM.Core +public import Ix.IxVM.ByteStream + +/-! +# Multi-STARK proof deserializer (Aiur) + +Aiur deserializer for `multi_stark::prover::Proof`, mirroring the hand-written +bincode reader in `multi-stark/src/manual_codec.rs` (`fn deserialize`). Only +the deserializer is ported (no serializer). + +The wire format is bincode `standard().with_little_endian().with_fixed_int_encoding()`: + +* every `Vec` / length prefix : `u64`, 8 bytes little-endian +* `Option` : 1 tag byte (`0` = None, `1` = Some), then value +* `u8` : 1 byte +* struct : fields in declaration order, no framing +* `[T; N]` / tuple : N elements back-to-back, no length prefix +* `Val` (Goldilocks) : raw `u64`, 8 bytes LE (NOT reduced mod p) +* `ExtVal` (deg-2 ext) : `[u64; 2]`, 16 bytes LE +* Merkle digest : `[u64; 4]`, 32 bytes LE + +`read_proof` builds a `Proof` object; `Ix/MultiStark.lean` hangs the entrypoint +(and eventual verification logic) off it. +-/ + +public section + +namespace MultiStark + +def deserialize := ⟦ + -- ========================================================================== + -- Wire-level type mirrors of `multi-stark/src/manual_codec.rs`. + -- `U64`/`ByteStream` come from `IxVM.byteStream`; raw Goldilocks `Val` is the + -- non-canonical `u64`, kept here as the 8 little-endian bytes (`U64`). + -- ========================================================================== + + -- `ExtVal = BinomialExtensionField = 𝔽_p[X]/(X² - 7)`, stored + -- as its two Goldilocks coefficients `[c0, c1]` (= `c0 + c1·X`). Each coefficient + -- is a NON-NATIVE Goldilocks element (`[U8; 8]`, canonical LE bytes) so that the + -- composition-polynomial arithmetic is field-agnostic — it emulates Goldilocks + -- on bytes instead of using Aiur's native (outer) field. See `Goldilocks.lean`. + type Ext = [[U8; 8]; 2] + + -- A Merkle digest: `[u64; DIGEST_ELEMS]` with `DIGEST_ELEMS = 4`. + type Digest = [U64; 4] + + -- `Commitment = MerkleCap<..>`: only the `cap: Vec` is on the wire. + type MerkleCap = List‹Digest› + + -- `OpenedValuesForRound = Vec>>`. + type OpenedRound = List‹List‹List‹Ext››› + + -- The three trace/quotient commitments at the head of a `Proof`. + enum Commitments { + Mk(MerkleCap, MerkleCap, MerkleCap) + } + + -- `BatchOpening`: per-round input opening of a FRI query. + -- opened_values : Vec> (one row of base-field values per matrix) + -- opening_proof : Vec (Merkle authentication path) + enum BatchOpening { + Mk(List‹List‹U64››, List‹Digest›) + } + + -- `CommitPhaseProofStep`: one folding step of a FRI query. + -- log_arity : u8 + -- sibling_values : Vec (arity - 1 siblings) + -- opening_proof : Vec + enum CommitPhaseProofStep { + Mk(U8, List‹Ext›, List‹Digest›) + } + + -- `QueryProof>>`. + enum QueryProof { + Mk(List‹BatchOpening›, List‹CommitPhaseProofStep›) + } + + -- `PcsProof = FriProof`. + -- commit_phase_commits : Vec + -- commit_pow_witnesses : Vec + -- query_proofs : Vec + -- final_poly : Vec + -- query_pow_witness : Val + enum FriProof { + Mk(List‹MerkleCap›, List‹U64›, List‹QueryProof›, List‹Ext›, U64) + } + + -- `Option` for `preprocessed_opened_values`. A dedicated + -- non-generic enum keeps constructor inference unambiguous. + enum PreprocessedOpt { + NoPreprocessed, + SomePreprocessed(OpenedRound) + } + + -- Mirror of `multi_stark::prover::Proof`, in serialization order. + enum Proof { + Mk( + Commitments, -- commitments + List‹Ext›, -- intermediate_accumulators + List‹U8›, -- log_degrees + FriProof, -- opening_proof + OpenedRound, -- quotient_opened_values + PreprocessedOpt, -- preprocessed_opened_values + OpenedRound, -- stage_1_opened_values + OpenedRound -- stage_2_opened_values + ) + } + + -- ========================================================================== + -- Byte-reading primitives. Every reader threads the remaining `ByteStream`. + -- ========================================================================== + + fn read_u8(stream: ByteStream) -> (U8, ByteStream) { + match load(stream) { + ListNode.Cons(byte, rest) => (byte, rest), + ListNode.Nil => (0u8, store(ListNode.Nil)), + } + } + + -- One raw `u64`: 8 little-endian bytes, kept as `U64` (no field reduction). + fn read_u64(stream: ByteStream) -> (U64, ByteStream) { + let (b0, s0) = read_u8(stream); + let (b1, s1) = read_u8(s0); + let (b2, s2) = read_u8(s1); + let (b3, s3) = read_u8(s2); + let (b4, s4) = read_u8(s3); + let (b5, s5) = read_u8(s4); + let (b6, s6) = read_u8(s5); + let (b7, s7) = read_u8(s6); + ([b0, b1, b2, b3, b4, b5, b6, b7], s7) + } + + -- A bincode length prefix: a fixed-width `u64`, flattened to a field count. + -- `flatten_u64` asserts the top byte is zero (< 2^56), always true for the + -- list lengths in a proof. + fn read_count(stream: ByteStream) -> (G, ByteStream) { + let (val, s) = read_u64(stream); + (flatten_u64(val), s) + } + + -- Interpret a raw little-endian `u64` limb as a Goldilocks field element. The + -- field add/mul reduce mod p, so a non-canonical wire repr (e.g. `0` shipped + -- as `p = 0xFFFFFFFF00000001`) maps to the canonical field element. + fn limb_to_field(b: U64) -> G { + to_field(b[0]) + + 0x100 * to_field(b[1]) + + 0x10000 * to_field(b[2]) + + 0x1000000 * to_field(b[3]) + + 0x100000000 * to_field(b[4]) + + 0x10000000000 * to_field(b[5]) + + 0x1000000000000 * to_field(b[6]) + + 0x100000000000000 * to_field(b[7]) + } + + -- `ExtVal` -> two `u64` limbs (no length prefix), each canonicalized into a + -- non-native Goldilocks element (`gl_reduce` maps a non-canonical wire repr, + -- e.g. `0` shipped as `p`, to its canonical `< p` bytes). + fn read_ext(stream: ByteStream) -> (Ext, ByteStream) { + let (a, s0) = read_u64(stream); + let (b, s1) = read_u64(s0); + ([gl_reduce(a), gl_reduce(b)], s1) + } + + + -- Merkle digest -> `[u64; 4]`, no length prefix. + fn read_digest(stream: ByteStream) -> (Digest, ByteStream) { + let (a, s0) = read_u64(stream); + let (b, s1) = read_u64(s0); + let (c, s2) = read_u64(s1); + let (d, s3) = read_u64(s2); + ([a, b, c, d], s3) + } + + -- ========================================================================== + -- Length-prefixed `Vec` readers. First-order, so one pair per element + -- type: `read_T_vec` reads the `u64` length then loops `read_T_vec_n`. + -- ========================================================================== + + fn read_u8_vec(stream: ByteStream) -> (List‹U8›, ByteStream) { + let (n, s) = read_count(stream); + read_u8_vec_n(s, n) + } + fn read_u8_vec_n(stream: ByteStream, n: G) -> (List‹U8›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_u8(stream); + let (rest, s2) = read_u8_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_u64_vec(stream: ByteStream) -> (List‹U64›, ByteStream) { + let (n, s) = read_count(stream); + read_u64_vec_n(s, n) + } + fn read_u64_vec_n(stream: ByteStream, n: G) -> (List‹U64›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_u64(stream); + let (rest, s2) = read_u64_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_ext_vec(stream: ByteStream) -> (List‹Ext›, ByteStream) { + let (n, s) = read_count(stream); + read_ext_vec_n(s, n) + } + fn read_ext_vec_n(stream: ByteStream, n: G) -> (List‹Ext›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_ext(stream); + let (rest, s2) = read_ext_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_digest_vec(stream: ByteStream) -> (List‹Digest›, ByteStream) { + let (n, s) = read_count(stream); + read_digest_vec_n(s, n) + } + fn read_digest_vec_n(stream: ByteStream, n: G) -> (List‹Digest›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_digest(stream); + let (rest, s2) = read_digest_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + -- `Vec>` for `BatchOpening.opened_values`. + fn read_u64_vec_vec(stream: ByteStream) -> (List‹List‹U64››, ByteStream) { + let (n, s) = read_count(stream); + read_u64_vec_vec_n(s, n) + } + fn read_u64_vec_vec_n(stream: ByteStream, n: G) -> (List‹List‹U64››, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_u64_vec(stream); + let (rest, s2) = read_u64_vec_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + -- `OpenedRound = Vec>>`, built bottom-up from `read_ext_vec`. + fn read_ext_vec_vec(stream: ByteStream) -> (List‹List‹Ext››, ByteStream) { + let (n, s) = read_count(stream); + read_ext_vec_vec_n(s, n) + } + fn read_ext_vec_vec_n(stream: ByteStream, n: G) -> (List‹List‹Ext››, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_ext_vec(stream); + let (rest, s2) = read_ext_vec_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_opened_round(stream: ByteStream) -> (OpenedRound, ByteStream) { + let (n, s) = read_count(stream); + read_opened_round_n(s, n) + } + fn read_opened_round_n(stream: ByteStream, n: G) -> (OpenedRound, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_ext_vec_vec(stream); + let (rest, s2) = read_opened_round_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + -- ========================================================================== + -- Struct readers (fields in declaration order, no framing). + -- ========================================================================== + + fn read_merkle_cap(stream: ByteStream) -> (MerkleCap, ByteStream) { + read_digest_vec(stream) + } + fn read_merkle_cap_vec(stream: ByteStream) -> (List‹MerkleCap›, ByteStream) { + let (n, s) = read_count(stream); + read_merkle_cap_vec_n(s, n) + } + fn read_merkle_cap_vec_n(stream: ByteStream, n: G) -> (List‹MerkleCap›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_merkle_cap(stream); + let (rest, s2) = read_merkle_cap_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_commitments(stream: ByteStream) -> (Commitments, ByteStream) { + let (stage_1_trace, s0) = read_merkle_cap(stream); + let (stage_2_trace, s1) = read_merkle_cap(s0); + let (quotient_chunks, s2) = read_merkle_cap(s1); + (Commitments.Mk(stage_1_trace, stage_2_trace, quotient_chunks), s2) + } + + fn read_batch_opening(stream: ByteStream) -> (BatchOpening, ByteStream) { + let (opened_values, s0) = read_u64_vec_vec(stream); + let (opening_proof, s1) = read_digest_vec(s0); + (BatchOpening.Mk(opened_values, opening_proof), s1) + } + fn read_batch_opening_vec(stream: ByteStream) -> (List‹BatchOpening›, ByteStream) { + let (n, s) = read_count(stream); + read_batch_opening_vec_n(s, n) + } + fn read_batch_opening_vec_n(stream: ByteStream, n: G) -> (List‹BatchOpening›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_batch_opening(stream); + let (rest, s2) = read_batch_opening_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_commit_phase_step(stream: ByteStream) -> (CommitPhaseProofStep, ByteStream) { + let (log_arity, s0) = read_u8(stream); + let (sibling_values, s1) = read_ext_vec(s0); + let (opening_proof, s2) = read_digest_vec(s1); + (CommitPhaseProofStep.Mk(log_arity, sibling_values, opening_proof), s2) + } + fn read_commit_phase_step_vec(stream: ByteStream) -> (List‹CommitPhaseProofStep›, ByteStream) { + let (n, s) = read_count(stream); + read_commit_phase_step_vec_n(s, n) + } + fn read_commit_phase_step_vec_n(stream: ByteStream, n: G) -> (List‹CommitPhaseProofStep›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_commit_phase_step(stream); + let (rest, s2) = read_commit_phase_step_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_query_proof(stream: ByteStream) -> (QueryProof, ByteStream) { + let (input_proof, s0) = read_batch_opening_vec(stream); + let (commit_phase_openings, s1) = read_commit_phase_step_vec(s0); + (QueryProof.Mk(input_proof, commit_phase_openings), s1) + } + fn read_query_proof_vec(stream: ByteStream) -> (List‹QueryProof›, ByteStream) { + let (n, s) = read_count(stream); + read_query_proof_vec_n(s, n) + } + fn read_query_proof_vec_n(stream: ByteStream, n: G) -> (List‹QueryProof›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_query_proof(stream); + let (rest, s2) = read_query_proof_vec_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_fri_proof(stream: ByteStream) -> (FriProof, ByteStream) { + let (commit_phase_commits, s0) = read_merkle_cap_vec(stream); + let (commit_pow_witnesses, s1) = read_u64_vec(s0); + let (query_proofs, s2) = read_query_proof_vec(s1); + let (final_poly, s3) = read_ext_vec(s2); + let (query_pow_witness, s4) = read_u64(s3); + (FriProof.Mk(commit_phase_commits, commit_pow_witnesses, query_proofs, + final_poly, query_pow_witness), s4) + } + + -- `Option`: 1 tag byte, then the value when `Some`. + fn read_preprocessed(stream: ByteStream) -> (PreprocessedOpt, ByteStream) { + let (tag, s) = read_u8(stream); + match tag { + 0 => (PreprocessedOpt.NoPreprocessed, s), + _ => + let (r, s2) = read_opened_round(s); + (PreprocessedOpt.SomePreprocessed(r), s2), + } + } + + -- Full `Proof` in declaration order (see `manual_codec.rs::Reader::proof`). + fn read_proof(stream: ByteStream) -> (Proof, ByteStream) { + let (commitments, s0) = read_commitments(stream); + let (intermediate_accumulators, s1) = read_ext_vec(s0); + let (log_degrees, s2) = read_u8_vec(s1); + let (opening_proof, s3) = read_fri_proof(s2); + let (quotient_opened_values, s4) = read_opened_round(s3); + let (preprocessed_opened_values, s5) = read_preprocessed(s4); + let (stage_1_opened_values, s6) = read_opened_round(s5); + let (stage_2_opened_values, s7) = read_opened_round(s6); + (Proof.Mk(commitments, intermediate_accumulators, log_degrees, opening_proof, + quotient_opened_values, preprocessed_opened_values, + stage_1_opened_values, stage_2_opened_values), s7) + } +⟧ + +end MultiStark + +end diff --git a/Ix/MultiStark/Goldilocks.lean b/Ix/MultiStark/Goldilocks.lean new file mode 100644 index 00000000..6bfc8c3a --- /dev/null +++ b/Ix/MultiStark/Goldilocks.lean @@ -0,0 +1,318 @@ +module +public import Ix.Aiur.Meta + +/-! +# Non-native Goldilocks field arithmetic (`Goldilocks = [U8; 8]`) + +The recursive verifier computes on the *inner* proof's field (Goldilocks), +but Aiur compiles the verifier to its *own* (outer) field — which is **not +always Goldilocks** (it may be the BLS scalar field). So inner-field arithmetic +cannot use Aiur's native `+`/`*` (those compute in the outer field); it must be +**emulated** on bytes. + +A `Goldilocks` element is its canonical value `< p = 2⁶⁴ − 2³² + 1` held as 8 +little-endian bytes (`[U8; 8]`), and `ExtGoldilocks = [Goldilocks; 2]` is the +degree-2 extension `𝔽_p[X]/(X² − 7)`. All ops are built from the u8 gadgets +(`u8_add`/`u8_sub`/`u8_mul` give full carry/borrow/16-bit-product), so they are +field-agnostic and produce bytes directly (no field→byte decomposition needed). + +`EPSILON = 2³² − 1`, and `2⁶⁴ ≡ EPSILON (mod p)`, the basis of the reductions. + +Validated byte-for-byte against `multi-stark`'s native Goldilocks (`gl_ops_ref` +in `multi-stark/src/types.rs`). +-/ + +public section + +namespace MultiStark + +def goldilocks := ⟦ + type Goldilocks = [U8; 8] + type ExtGoldilocks = [Goldilocks; 2] + + -- p = 2⁶⁴ − 2³² + 1 and EPSILON = 2³² − 1, little-endian. + fn gl_p() -> Goldilocks { [1u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8] } + fn gl_eps() -> Goldilocks { [255u8, 255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 0u8] } + fn gl_zero() -> Goldilocks { [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] } + fn gl_one() -> Goldilocks { [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] } + + -- ========================================================================== + -- Byte-vector primitives (carry/borrow chains over the u8 gadgets). + -- ========================================================================== + + -- Add two bytes with a carry-in (0/1). Returns (sum byte, carry-out 0/1). + -- `u8_add` adds only two bytes; the carry-in is folded with a second add. At + -- most one of the two overflow flags fires, so the carry-out stays 0/1. + fn adc(a: U8, b: U8, cin: G) -> (U8, G) { + let (s1, o1) = u8_add(a, b); + let (s2, o2) = u8_add(s1, u8_from_field_unsafe(cin)); + (s2, to_field(o1) + to_field(o2)) + } + + -- Subtract two bytes with a borrow-in (0/1). Returns (diff byte, borrow-out). + fn sbb(a: U8, b: U8, bin: G) -> (U8, G) { + let (d1, w1) = u8_sub(a, b); + let (d2, w2) = u8_sub(d1, u8_from_field_unsafe(bin)); + (d2, to_field(w1) + to_field(w2)) + } + + -- 8-byte (64-bit) add. Returns the 8-byte sum and the final carry (0/1). + fn add8(a: Goldilocks, b: Goldilocks) -> (Goldilocks, G) { + let (r0, c0) = adc(a[0], b[0], 0); + let (r1, c1) = adc(a[1], b[1], c0); + let (r2, c2) = adc(a[2], b[2], c1); + let (r3, c3) = adc(a[3], b[3], c2); + let (r4, c4) = adc(a[4], b[4], c3); + let (r5, c5) = adc(a[5], b[5], c4); + let (r6, c6) = adc(a[6], b[6], c5); + let (r7, c7) = adc(a[7], b[7], c6); + ([r0, r1, r2, r3, r4, r5, r6, r7], c7) + } + + -- 8-byte (64-bit) subtract. Returns the 8-byte difference and final borrow. + fn sub8(a: Goldilocks, b: Goldilocks) -> (Goldilocks, G) { + let (r0, w0) = sbb(a[0], b[0], 0); + let (r1, w1) = sbb(a[1], b[1], w0); + let (r2, w2) = sbb(a[2], b[2], w1); + let (r3, w3) = sbb(a[3], b[3], w2); + let (r4, w4) = sbb(a[4], b[4], w3); + let (r5, w5) = sbb(a[5], b[5], w4); + let (r6, w6) = sbb(a[6], b[6], w5); + let (r7, w7) = sbb(a[7], b[7], w6); + ([r0, r1, r2, r3, r4, r5, r6, r7], w7) + } + + -- Branchless select on a 0/1 flag: `cond ? x : y` (per byte). `cond` is a + -- field element in {0,1}; each result byte equals x[i] or y[i] (both < 256), + -- so `u8_from_field_unsafe` is safe. + fn sel(cond: G, x: U8, y: U8) -> U8 { + u8_from_field_unsafe(to_field(x) * cond + to_field(y) * (1 - cond)) + } + fn select8(cond: G, x: Goldilocks, y: Goldilocks) -> Goldilocks { + [sel(cond, x[0], y[0]), sel(cond, x[1], y[1]), sel(cond, x[2], y[2]), + sel(cond, x[3], y[3]), sel(cond, x[4], y[4]), sel(cond, x[5], y[5]), + sel(cond, x[6], y[6]), sel(cond, x[7], y[7])] + } + + -- ========================================================================== + -- Base field add / sub (mod p). + -- ========================================================================== + + -- a + b mod p. The raw sum is `c·2⁶⁴ + s` with `c ∈ {0,1}`, value `< 2p`: + -- • c = 1 ⇒ s < p (since 2⁶⁴ > p), and `2⁶⁴ + s − p = s + EPSILON` + -- (a non-overflowing add since the true result is < p); + -- • c = 0 ⇒ reduce by one conditional subtraction of p (when s ≥ p). + fn gl_add(a: Goldilocks, b: Goldilocks) -> Goldilocks { + let (s, c) = add8(a, b); + let (s_minus_p, borrow) = sub8(s, gl_p()); -- borrow = 1 iff s < p + let (s_plus_eps, _) = add8(s, gl_eps()); + let c0 = select8(1 - borrow, s_minus_p, s); -- c=0 branch: (s≥p ? s−p : s) + select8(c, s_plus_eps, c0) + } + + -- a − b mod p. `sub8` borrow = 1 iff a < b, in which case the true result is + -- `(a − b mod 2⁶⁴) − EPSILON = a − b + p`. + fn gl_sub(a: Goldilocks, b: Goldilocks) -> Goldilocks { + let (d, borrow) = sub8(a, b); + let (d_minus_eps, _) = sub8(d, gl_eps()); + select8(borrow, d_minus_eps, d) + } + + fn gl_neg(a: Goldilocks) -> Goldilocks { gl_sub(gl_zero(), a) } + fn gl_seven() -> Goldilocks { [7u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] } + fn gl_two() -> Goldilocks { [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] } + + -- Canonicalize an arbitrary 8-byte value (possibly ≥ p, e.g. a raw hash limb + -- or a non-canonical wire encoding) to the canonical representative `< p`. + -- Any 8-byte value is `< 2⁶⁴ < 2p`, so one conditional subtraction of p + -- suffices (borrow = 1 iff x < p ⇒ already canonical). + fn gl_reduce(x: Goldilocks) -> Goldilocks { + let (xmp, borrow) = sub8(x, gl_p()); + select8(borrow, x, xmp) + } + + -- 1 iff `x` is the field zero (canonicalize first, then test all bytes zero). + -- The byte sum is `< 8·256 = 2048`, so `eq_zero` of the sum is field-agnostic. + fn gl_is_zero(x: Goldilocks) -> G { + let r = gl_reduce(x); + eq_zero(to_field(r[0]) + to_field(r[1]) + to_field(r[2]) + to_field(r[3]) + + to_field(r[4]) + to_field(r[5]) + to_field(r[6]) + to_field(r[7])) + } + + -- 1 iff two canonical Goldilocks values are equal (byte-exact). Inputs must be + -- canonical (`gl_*` outputs always are); each `to_field` difference is small. + fn gl_eq(a: Goldilocks, b: Goldilocks) -> G { + eq_zero(to_field(a[0]) - to_field(b[0])) * eq_zero(to_field(a[1]) - to_field(b[1])) + * eq_zero(to_field(a[2]) - to_field(b[2])) * eq_zero(to_field(a[3]) - to_field(b[3])) + * eq_zero(to_field(a[4]) - to_field(b[4])) * eq_zero(to_field(a[5]) - to_field(b[5])) + * eq_zero(to_field(a[6]) - to_field(b[6])) * eq_zero(to_field(a[7]) - to_field(b[7])) + } + + -- ========================================================================== + -- Base field multiply (mod p): schoolbook 64×64 → 128-bit product, then the + -- Goldilocks `reduce128` (ported from Plonky3 `goldilocks.rs`). + -- ========================================================================== + + -- 8-byte × 1-byte → 9-byte product (`a · m`, with `a < 2⁶⁴`, `m < 2⁸`, so the + -- result is `< 2⁷²`). Column `k` is `lo_k + hi_{k-1} + carry`, and the carry + -- provably stays in {0,1} (each column sum ≤ 255+255+1 = 511 < 512). + fn mul1(a: Goldilocks, m: U8) -> [U8; 9] { + let (l0, h0) = u8_mul(a[0], m); + let (l1, h1) = u8_mul(a[1], m); + let (l2, h2) = u8_mul(a[2], m); + let (l3, h3) = u8_mul(a[3], m); + let (l4, h4) = u8_mul(a[4], m); + let (l5, h5) = u8_mul(a[5], m); + let (l6, h6) = u8_mul(a[6], m); + let (l7, h7) = u8_mul(a[7], m); + let (r1, c1) = adc(l1, h0, 0); + let (r2, c2) = adc(l2, h1, c1); + let (r3, c3) = adc(l3, h2, c2); + let (r4, c4) = adc(l4, h3, c3); + let (r5, c5) = adc(l5, h4, c4); + let (r6, c6) = adc(l6, h5, c5); + let (r7, c7) = adc(l7, h6, c6); + let (r8, _) = adc(h7, 0u8, c7); + [l0, r1, r2, r3, r4, r5, r6, r7, r8] + } + + -- 16-byte (128-bit) add, carry chain. The final carry is discarded (used only + -- to accumulate partial-product rows whose total provably fits in 128 bits). + fn add16(x: [U8; 16], y: [U8; 16]) -> [U8; 16] { + let (r0, c0) = adc(x[0], y[0], 0); + let (r1, c1) = adc(x[1], y[1], c0); + let (r2, c2) = adc(x[2], y[2], c1); + let (r3, c3) = adc(x[3], y[3], c2); + let (r4, c4) = adc(x[4], y[4], c3); + let (r5, c5) = adc(x[5], y[5], c4); + let (r6, c6) = adc(x[6], y[6], c5); + let (r7, c7) = adc(x[7], y[7], c6); + let (r8, c8) = adc(x[8], y[8], c7); + let (r9, c9) = adc(x[9], y[9], c8); + let (r10, c10) = adc(x[10], y[10], c9); + let (r11, c11) = adc(x[11], y[11], c10); + let (r12, c12) = adc(x[12], y[12], c11); + let (r13, c13) = adc(x[13], y[13], c12); + let (r14, c14) = adc(x[14], y[14], c13); + let (r15, _) = adc(x[15], y[15], c14); + [r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15] + } + + -- Full 64×64 → 128-bit product as 16 little-endian bytes. Eight single-byte + -- rows `a · b[i]`, each shifted left by `i` bytes and accumulated. + fn mul128(a: Goldilocks, b: Goldilocks) -> [U8; 16] { + let m0 = mul1(a, b[0]); + let m1 = mul1(a, b[1]); + let m2 = mul1(a, b[2]); + let m3 = mul1(a, b[3]); + let m4 = mul1(a, b[4]); + let m5 = mul1(a, b[5]); + let m6 = mul1(a, b[6]); + let m7 = mul1(a, b[7]); + let acc = [m0[0], m0[1], m0[2], m0[3], m0[4], m0[5], m0[6], m0[7], m0[8], + 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; + let rb1 = [0u8, m1[0], m1[1], m1[2], m1[3], m1[4], m1[5], m1[6], m1[7], m1[8], + 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; + let acc = add16(acc, rb1); + let rb2 = [0u8, 0u8, m2[0], m2[1], m2[2], m2[3], m2[4], m2[5], m2[6], m2[7], m2[8], + 0u8, 0u8, 0u8, 0u8, 0u8]; + let acc = add16(acc, rb2); + let rb3 = [0u8, 0u8, 0u8, m3[0], m3[1], m3[2], m3[3], m3[4], m3[5], m3[6], m3[7], m3[8], + 0u8, 0u8, 0u8, 0u8]; + let acc = add16(acc, rb3); + let rb4 = [0u8, 0u8, 0u8, 0u8, m4[0], m4[1], m4[2], m4[3], m4[4], m4[5], m4[6], m4[7], m4[8], + 0u8, 0u8, 0u8]; + let acc = add16(acc, rb4); + let rb5 = [0u8, 0u8, 0u8, 0u8, 0u8, m5[0], m5[1], m5[2], m5[3], m5[4], m5[5], m5[6], m5[7], m5[8], + 0u8, 0u8]; + let acc = add16(acc, rb5); + let rb6 = [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, m6[0], m6[1], m6[2], m6[3], m6[4], m6[5], m6[6], m6[7], m6[8], + 0u8]; + let acc = add16(acc, rb6); + let rb7 = [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, m7[0], m7[1], m7[2], m7[3], m7[4], m7[5], m7[6], m7[7], m7[8]]; + add16(acc, rb7) + } + + -- Reduce a 128-bit value (16 LE bytes) mod p. Port of Plonky3 `reduce128`: + -- with `x = x_lo + x_hi·2⁶⁴`, split `x_hi = x_hi_lo + x_hi_hi·2³²`; then + -- `2⁶⁴ ≡ EPSILON` and `2⁹⁶ ≡ −1` give `x ≡ x_lo − x_hi_hi + x_hi_lo·EPSILON`. + -- A final conditional subtraction of p canonicalizes (Plonky3 keeps a + -- non-canonical u64; we need the canonical bytes). + fn reduce128(p: [U8; 16]) -> Goldilocks { + let xlo = [p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]]; + let xhl = [p[8], p[9], p[10], p[11], 0u8, 0u8, 0u8, 0u8]; -- low 32 bits of x_hi + let xhh = [p[12], p[13], p[14], p[15], 0u8, 0u8, 0u8, 0u8]; -- high 32 bits of x_hi + -- t0 = x_lo − x_hi_hi (subtract EPSILON on borrow; cannot underflow). + let (t0w, borrow) = sub8(xlo, xhh); + let (t0b, _) = sub8(t0w, gl_eps()); + let t0 = select8(borrow, t0b, t0w); + -- t1 = x_hi_lo · EPSILON (< 2⁶⁴, so only the low 8 product bytes matter). + let t1full = mul128(xhl, gl_eps()); + let t1 = [t1full[0], t1full[1], t1full[2], t1full[3], + t1full[4], t1full[5], t1full[6], t1full[7]]; + -- t2 = t0 + t1 (add EPSILON on overflow; result stays < 2⁶⁴). + let (t2w, carry) = add8(t0, t1); + let (t2c, _) = add8(t2w, gl_eps()); + let t2 = select8(carry, t2c, t2w); + -- canonicalize: borrow2 = 1 iff t2 < p ⇒ keep t2, else subtract p. + let (t2mp, borrow2) = sub8(t2, gl_p()); + select8(borrow2, t2, t2mp) + } + + fn gl_mul(a: Goldilocks, b: Goldilocks) -> Goldilocks { reduce128(mul128(a, b)) } + fn gl_sq(a: Goldilocks) -> Goldilocks { gl_mul(a, a) } + + -- ========================================================================== + -- Base field inverse / divide via Fermat: a⁻¹ = a^(p−2). + -- p − 2 = 2⁶⁴ − 2³² − 1 — in binary: bit 32 is 0, every other bit is 1 + -- (31 ones, one zero, 32 ones). `gl_run` applies `n` steps of `acc ← acc²·b`. + -- ========================================================================== + fn gl_run(acc: Goldilocks, base: Goldilocks, n: G) -> Goldilocks { + match n { + 0 => acc, + _ => gl_run(gl_mul(gl_sq(acc), base), base, n - 1), + } + } + fn gl_inverse(x: Goldilocks) -> Goldilocks { + let acc = gl_run(x, x, 30); -- bits 63..33: 31 ones (initial acc = x is bit 63) + let acc = gl_sq(acc); -- bit 32: a single 0 + gl_run(acc, x, 32) -- bits 31..0: 32 ones + } + fn gl_div(a: Goldilocks, b: Goldilocks) -> Goldilocks { gl_mul(a, gl_inverse(b)) } + + -- ========================================================================== + -- Extension field ExtGoldilocks = 𝔽_p[X]/(X² − 7). + -- ========================================================================== + fn eg_add(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks { + [gl_add(a[0], b[0]), gl_add(a[1], b[1])] + } + fn eg_sub(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks { + [gl_sub(a[0], b[0]), gl_sub(a[1], b[1])] + } + fn eg_neg(a: ExtGoldilocks) -> ExtGoldilocks { + [gl_neg(a[0]), gl_neg(a[1])] + } + -- (a0 + a1·X)(b0 + b1·X) = (a0·b0 + 7·a1·b1) + (a0·b1 + a1·b0)·X. + fn eg_mul(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks { + [gl_add(gl_mul(a[0], b[0]), gl_mul(gl_seven(), gl_mul(a[1], b[1]))), + gl_add(gl_mul(a[0], b[1]), gl_mul(a[1], b[0]))] + } + -- conjugate ā = a0 − a1·X, norm a·ā = a0² − 7·a1² ∈ 𝔽_p, a⁻¹ = ā / norm. + fn eg_inverse(a: ExtGoldilocks) -> ExtGoldilocks { + let norm = gl_sub(gl_sq(a[0]), gl_mul(gl_seven(), gl_sq(a[1]))); + let ninv = gl_inverse(norm); + [gl_mul(a[0], ninv), gl_mul(gl_neg(a[1]), ninv)] + } + fn eg_div(a: ExtGoldilocks, b: ExtGoldilocks) -> ExtGoldilocks { + eg_mul(a, eg_inverse(b)) + } + -- 1 iff two extension elements are equal (both coordinates byte-exact). + fn eg_eq(a: ExtGoldilocks, b: ExtGoldilocks) -> G { + gl_eq(a[0], b[0]) * gl_eq(a[1], b[1]) + } + +⟧ + +end MultiStark + +end diff --git a/Ix/MultiStark/Keccak.lean b/Ix/MultiStark/Keccak.lean new file mode 100644 index 00000000..a2eb7e73 --- /dev/null +++ b/Ix/MultiStark/Keccak.lean @@ -0,0 +1,355 @@ +module +public import Ix.Aiur.Meta +public import Ix.IxVM.Core +public import Ix.IxVM.ByteStream + +/-! +# Keccak-256 in Aiur + +The hash multi-stark uses (`p3_keccak`: `Keccak256Hash` + `KeccakF` permutation, +keccak-f[1600] sponge). This is the Ethereum/original Keccak variant +(`tiny_keccak::Keccak::v256`): pad `10*1` with first pad byte `0x01` and final +`0x80`, rate = 136 bytes (17 lanes), capacity 512, 256-bit output. + +Representation: +* lane = 64-bit word = `[U8; 8]`, little-endian bytes (byte 0 = least significant). +* state = `[Lane; 25]`, lane index `L(x,y) = x + 5*y`. + +Bit ops use the `u8` gadgets (`u8_xor`/`u8_and`; NOT = `u8_xor(b, 0xFF)`). +Rotations go through `u8_bit_decomposition` (u8 → `[field; 8]`) and a generic +bit-list rotate, recomposing bytes with `u8_from_field_unsafe`. + +**Scope:** single-block only — message length ≤ 135 bytes (one keccak-f call). +Multi-block absorption is future work. +-/ + +public section + +namespace MultiStark + +def keccak := ⟦ + -- A 64-bit Keccak lane: a pointer to 8 little-endian bytes. Storing lanes + -- behind a pointer keeps the state (and every lane-passing function) one + -- column wide instead of eight. + type Lane = &[U8; 8] + + -- The keccak-f state: a pointer to the 5×5 = 25 lanes. Passed behind a + -- pointer too, so state-threading functions stay one column wide. + type State = &[Lane; 25] + + -- ========================================================================== + -- Lane bit-logic (byte-wise u8 gadgets). + -- ========================================================================== + + fn xor8(a: Lane, b: Lane) -> Lane { + let x = load(a); + let y = load(b); + store([u8_xor(x[0], y[0]), u8_xor(x[1], y[1]), u8_xor(x[2], y[2]), u8_xor(x[3], y[3]), + u8_xor(x[4], y[4]), u8_xor(x[5], y[5]), u8_xor(x[6], y[6]), u8_xor(x[7], y[7])]) + } + + fn and8(a: Lane, b: Lane) -> Lane { + let x = load(a); + let y = load(b); + store([u8_and(x[0], y[0]), u8_and(x[1], y[1]), u8_and(x[2], y[2]), u8_and(x[3], y[3]), + u8_and(x[4], y[4]), u8_and(x[5], y[5]), u8_and(x[6], y[6]), u8_and(x[7], y[7])]) + } + + -- Bitwise NOT via XOR with 0xFF (keeps the byte `u8`-typed). + fn not8(a: Lane) -> Lane { + let x = load(a); + store([u8_xor(x[0], 255u8), u8_xor(x[1], 255u8), u8_xor(x[2], 255u8), u8_xor(x[3], 255u8), + u8_xor(x[4], 255u8), u8_xor(x[5], 255u8), u8_xor(x[6], 255u8), u8_xor(x[7], 255u8)]) + } + + -- ========================================================================== + -- Generic cyclic left-rotation of a lane by `n` bits (0 ≤ n < 64). + -- Decompose to a 64-element bit list (LSB first), reindex, recompose. + -- ========================================================================== + + -- Prepend the 8 bits of a byte decomposition (LSB first) onto `tail`. + fn cons8(d: [G; 8], tail: List‹G›) -> List‹G› { + store(ListNode.Cons(d[0], store(ListNode.Cons(d[1], store(ListNode.Cons(d[2], + store(ListNode.Cons(d[3], store(ListNode.Cons(d[4], store(ListNode.Cons(d[5], + store(ListNode.Cons(d[6], store(ListNode.Cons(d[7], tail)))))))))))))))) + } + + -- Lane → 64 bits, index z = 8*byte + bit, LSB first. + fn lane_bits(l: Lane) -> List‹G› { + let v = load(l); + cons8(u8_bit_decomposition(v[0]), + cons8(u8_bit_decomposition(v[1]), + cons8(u8_bit_decomposition(v[2]), + cons8(u8_bit_decomposition(v[3]), + cons8(u8_bit_decomposition(v[4]), + cons8(u8_bit_decomposition(v[5]), + cons8(u8_bit_decomposition(v[6]), + cons8(u8_bit_decomposition(v[7]), store(ListNode.Nil))))))))) + } + + -- x mod 64 for x in [0, 127]: drop the 64-bit (bit 6). + fn mod64(x: G) -> G { + let bits = u8_bit_decomposition(u8_from_field_unsafe(x)); + x - 64 * bits[6] + } + + -- Recompose a byte (as `u8`) from bits[off .. off+7] (LSB first). Indexed + -- access is inlined as `list_drop` (not `list_lookup`): `list_drop` recurses + -- on the same list pointer, so the consecutive drops here share the cached + -- drop-chain (far better caching than `list_lookup`). + fn byte_from_bits(bs: List‹G›, off: G) -> U8 { + -- One `list_drop` to the byte's first bit; the remaining 7 bits are the + -- successive tails (a deref each), so no further drops are needed. + let &ListNode.Cons(b0, r0) = list_drop(bs, off); + let &ListNode.Cons(b1, r1) = r0; + let &ListNode.Cons(b2, r2) = r1; + let &ListNode.Cons(b3, r3) = r2; + let &ListNode.Cons(b4, r4) = r3; + let &ListNode.Cons(b5, r5) = r4; + let &ListNode.Cons(b6, r6) = r5; + let &ListNode.Cons(b7, _) = r6; + u8_from_field_unsafe( + b0 + 2 * b1 + 4 * b2 + 8 * b3 + 16 * b4 + 32 * b5 + 64 * b6 + 128 * b7) + } + + fn lane_from_bits(bs: List‹G›) -> Lane { + store([byte_from_bits(bs, 0), byte_from_bits(bs, 8), byte_from_bits(bs, 16), byte_from_bits(bs, 24), + byte_from_bits(bs, 32), byte_from_bits(bs, 40), byte_from_bits(bs, 48), byte_from_bits(bs, 56)]) + } + + -- Build the rotated bit list: output position p (0..63) takes source + -- bit (p - n) mod 64 = mod64(p + 64 - n). + fn build_rot(bs: List‹G›, n: G, p: G) -> List‹G› { + match 64 - p { + 0 => store(ListNode.Nil), + _ => + let src = mod64(p + 64 - n); + let &ListNode.Cons(head, _) = list_drop(bs, src); + store(ListNode.Cons(head, build_rot(bs, n, p + 1))), + } + } + + fn rotl(l: Lane, n: G) -> Lane { + lane_from_bits(build_rot(lane_bits(l), n, 0)) + } + + -- ========================================================================== + -- keccak-f[1600]: 24 rounds of θ ρ π χ ι. + -- ========================================================================== + + fn keccak_round(sp: State, rc: Lane) -> State { + let s = load(sp); + -- θ: column parities and the D correction. + let c0 = xor8(xor8(xor8(xor8(s[0], s[5]), s[10]), s[15]), s[20]); + let c1 = xor8(xor8(xor8(xor8(s[1], s[6]), s[11]), s[16]), s[21]); + let c2 = xor8(xor8(xor8(xor8(s[2], s[7]), s[12]), s[17]), s[22]); + let c3 = xor8(xor8(xor8(xor8(s[3], s[8]), s[13]), s[18]), s[23]); + let c4 = xor8(xor8(xor8(xor8(s[4], s[9]), s[14]), s[19]), s[24]); + let d0 = xor8(c4, rotl(c1, 1)); + let d1 = xor8(c0, rotl(c2, 1)); + let d2 = xor8(c1, rotl(c3, 1)); + let d3 = xor8(c2, rotl(c4, 1)); + let d4 = xor8(c3, rotl(c0, 1)); + let a0 = xor8(s[0], d0); let a1 = xor8(s[1], d1); let a2 = xor8(s[2], d2); + let a3 = xor8(s[3], d3); let a4 = xor8(s[4], d4); + let a5 = xor8(s[5], d0); let a6 = xor8(s[6], d1); let a7 = xor8(s[7], d2); + let a8 = xor8(s[8], d3); let a9 = xor8(s[9], d4); + let a10 = xor8(s[10], d0); let a11 = xor8(s[11], d1); let a12 = xor8(s[12], d2); + let a13 = xor8(s[13], d3); let a14 = xor8(s[14], d4); + let a15 = xor8(s[15], d0); let a16 = xor8(s[16], d1); let a17 = xor8(s[17], d2); + let a18 = xor8(s[18], d3); let a19 = xor8(s[19], d4); + let a20 = xor8(s[20], d0); let a21 = xor8(s[21], d1); let a22 = xor8(s[22], d2); + let a23 = xor8(s[23], d3); let a24 = xor8(s[24], d4); + -- ρ + π: B[dest] = rotl(A'[src], offset). + let b0 = a0; + let b1 = rotl(a6, 44); + let b2 = rotl(a12, 43); + let b3 = rotl(a18, 21); + let b4 = rotl(a24, 14); + let b5 = rotl(a3, 28); + let b6 = rotl(a9, 20); + let b7 = rotl(a10, 3); + let b8 = rotl(a16, 45); + let b9 = rotl(a22, 61); + let b10 = rotl(a1, 1); + let b11 = rotl(a7, 6); + let b12 = rotl(a13, 25); + let b13 = rotl(a19, 8); + let b14 = rotl(a20, 18); + let b15 = rotl(a4, 27); + let b16 = rotl(a5, 36); + let b17 = rotl(a11, 10); + let b18 = rotl(a17, 15); + let b19 = rotl(a23, 56); + let b20 = rotl(a2, 62); + let b21 = rotl(a8, 55); + let b22 = rotl(a14, 39); + let b23 = rotl(a15, 41); + let b24 = rotl(a21, 2); + -- χ: A''[x][y] = B[x][y] ^ (¬B[x+1][y] & B[x+2][y]). + let e0 = xor8(b0, and8(not8(b1), b2)); + let e1 = xor8(b1, and8(not8(b2), b3)); + let e2 = xor8(b2, and8(not8(b3), b4)); + let e3 = xor8(b3, and8(not8(b4), b0)); + let e4 = xor8(b4, and8(not8(b0), b1)); + let e5 = xor8(b5, and8(not8(b6), b7)); + let e6 = xor8(b6, and8(not8(b7), b8)); + let e7 = xor8(b7, and8(not8(b8), b9)); + let e8 = xor8(b8, and8(not8(b9), b5)); + let e9 = xor8(b9, and8(not8(b5), b6)); + let e10 = xor8(b10, and8(not8(b11), b12)); + let e11 = xor8(b11, and8(not8(b12), b13)); + let e12 = xor8(b12, and8(not8(b13), b14)); + let e13 = xor8(b13, and8(not8(b14), b10)); + let e14 = xor8(b14, and8(not8(b10), b11)); + let e15 = xor8(b15, and8(not8(b16), b17)); + let e16 = xor8(b16, and8(not8(b17), b18)); + let e17 = xor8(b17, and8(not8(b18), b19)); + let e18 = xor8(b18, and8(not8(b19), b15)); + let e19 = xor8(b19, and8(not8(b15), b16)); + let e20 = xor8(b20, and8(not8(b21), b22)); + let e21 = xor8(b21, and8(not8(b22), b23)); + let e22 = xor8(b22, and8(not8(b23), b24)); + let e23 = xor8(b23, and8(not8(b24), b20)); + let e24 = xor8(b24, and8(not8(b20), b21)); + -- ι: add round constant to lane (0,0). + let f0 = xor8(e0, rc); + store([f0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, + e15, e16, e17, e18, e19, e20, e21, e22, e23, e24]) + } + + -- Round constant for round `i` (little-endian byte lane). A match (not an + -- array `get`, which needs a literal index) so it can be indexed at runtime. + fn rc_lane(i: G) -> Lane { + match i { + 0 => store([0x01u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8 ]), + 1 => store([0x82u8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8 ]), + 2 => store([0x8au8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 3 => store([0x00u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]), + 4 => store([0x8bu8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8 ]), + 5 => store([0x01u8, 0u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8 ]), + 6 => store([0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]), + 7 => store([0x09u8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 8 => store([0x8au8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8 ]), + 9 => store([0x88u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8 ]), + 10 => store([0x09u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8 ]), + 11 => store([0x0au8, 0u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8 ]), + 12 => store([0x8bu8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8 ]), + 13 => store([0x8bu8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 14 => store([0x89u8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 15 => store([0x03u8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 16 => store([0x02u8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 17 => store([0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 18 => store([0x0au8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8 ]), + 19 => store([0x0au8, 0u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]), + 20 => store([0x81u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]), + 21 => store([0x80u8, 0x80u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0x80u8]), + 22 => store([0x01u8, 0u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0u8 ]), + _ => store([0x08u8, 0x80u8, 0u8, 0x80u8, 0u8, 0u8, 0u8, 0x80u8]), + } + } + + -- Apply the last `n` keccak-f rounds recursively (round index 24 - n), so + -- each round is its own call frame rather than 24 inlined copies (`fold`), + -- keeping the circuit narrow. `keccak_f_fold(s, 24)` is the full permutation. + fn keccak_f_fold(s: State, n: G) -> State { + match n { + 0 => s, + _ => keccak_f_fold(keccak_round(s, rc_lane(24 - n)), n - 1), + } + } + + -- ========================================================================== + -- Single-block sponge (keccak-256, message ≤ 135 bytes). + -- ========================================================================== + + -- Pad bytes for positions i..135 once the message is exhausted. `first`=1 at + -- position `len` (gets the 0x01 pad bit); position 135 gets the 0x80 bit + -- (combining to 0x81 when len == 135). + -- `first` is 1 on the first pad position (gets 0x01), 0 afterwards. The 0x80 + -- bit lands on position 135 (combining to 0x81 when first==1 there). Uses + -- `eq_zero` (an op) instead of a match so it stays tail-position-compilable. + fn build_pad(i: G, first: G) -> ByteStream { + match 136 - i { + 0 => store(ListNode.Nil), + _ => + store(ListNode.Cons(u8_from_field_unsafe(first + 128 * eq_zero(135 - i)), + build_pad(i + 1, 0))), + } + } + + -- Take one 136-byte rate block from `stream` starting at position `i`. + -- Returns (block, rest, full): a length-136 byte list, the unconsumed stream, + -- and full=1 if 136 message bytes were consumed (no padding yet), else 0 (the + -- message ended in this block, so pad10*1 was applied). When the stream is + -- already empty this yields an all-padding block with full=0 — the extra + -- block keccak appends when the message length is a multiple of 136. + fn read_block(stream: ByteStream, i: G) -> (ByteStream, ByteStream, G) { + match 136 - i { + 0 => (store(ListNode.Nil), stream, 1), + _ => match load(stream) { + ListNode.Cons(b, rest) => + let (blk, r, f) = read_block(rest, i + 1); + (store(ListNode.Cons(b, blk)), r, f), + ListNode.Nil => (build_pad(i, 1), store(ListNode.Nil), 0), + }, + } + } + + fn rate_lane(rate: ByteStream, b: G) -> Lane { + -- One `list_drop` to the lane's first byte; the next 7 are successive tails. + let &ListNode.Cons(r0, t0) = list_drop(rate, b); + let &ListNode.Cons(r1, t1) = t0; + let &ListNode.Cons(r2, t2) = t1; + let &ListNode.Cons(r3, t3) = t2; + let &ListNode.Cons(r4, t4) = t3; + let &ListNode.Cons(r5, t5) = t4; + let &ListNode.Cons(r6, t6) = t5; + let &ListNode.Cons(r7, _) = t6; + store([r0, r1, r2, r3, r4, r5, r6, r7]) + } + + -- XOR one 136-byte rate block into the first 17 lanes, then permute. + fn absorb_one(sp: State, rate: ByteStream) -> State { + let state = load(sp); + let s = store([ + xor8(state[0], rate_lane(rate, 0)), xor8(state[1], rate_lane(rate, 8)), + xor8(state[2], rate_lane(rate, 16)), xor8(state[3], rate_lane(rate, 24)), + xor8(state[4], rate_lane(rate, 32)), xor8(state[5], rate_lane(rate, 40)), + xor8(state[6], rate_lane(rate, 48)), xor8(state[7], rate_lane(rate, 56)), + xor8(state[8], rate_lane(rate, 64)), xor8(state[9], rate_lane(rate, 72)), + xor8(state[10], rate_lane(rate, 80)), xor8(state[11], rate_lane(rate, 88)), + xor8(state[12], rate_lane(rate, 96)), xor8(state[13], rate_lane(rate, 104)), + xor8(state[14], rate_lane(rate, 112)), xor8(state[15], rate_lane(rate, 120)), + xor8(state[16], rate_lane(rate, 128)), + state[17], state[18], state[19], state[20], state[21], state[22], state[23], state[24] + ]); + keccak_f_fold(s, 24) + } + + -- Absorb every rate block of the (padded) message into the state. + fn absorb_blocks(stream: ByteStream, state: State) -> State { + let (block, rest, full) = read_block(stream, 0); + let st2 = absorb_one(state, block); + match full { + 0 => st2, + _ => absorb_blocks(rest, st2), + } + } + + -- keccak-256: absorb the byte stream into the zero state, then squeeze the + -- first 32 bytes (lanes 0..3). Lanes are `load`ed back to inline bytes so the + -- digest is a plain `[[U8; 8]; 4]`. + fn keccak256(bytes: ByteStream) -> [[U8; 8]; 4] { + let z = store([0u8; 8]); + let init = store([z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z]); + let sp = absorb_blocks(bytes, init); + let s = load(sp); + [load(s[0]), load(s[1]), load(s[2]), load(s[3])] + } + +⟧ + +end MultiStark + +end diff --git a/Ix/MultiStark/Pcs.lean b/Ix/MultiStark/Pcs.lean new file mode 100644 index 00000000..024adb87 --- /dev/null +++ b/Ix/MultiStark/Pcs.lean @@ -0,0 +1,772 @@ +module +public import Ix.Aiur.Meta +public import Ix.MultiStark.Deserialize +public import Ix.MultiStark.Keccak + +/-! +# PCS (FRI) verification + +`multi-stark/src/verifier.rs` calls `pcs.verify(coms_to_verify, opening_proof, +&mut challenger)` — a `TwoAdicFriPcs` FRI verification: query openings, Merkle +authentication paths, FRI folding consistency. This is the heaviest part of the +verifier and is being ported here in stages. + +## Stage 1 — Merkle (MMCS) hash primitives (this file, done + validated) + +The input/commit-phase commitments are `MerkleTreeMmcs` over the keccak hasher +configured in `multi-stark/src/types.rs`: + +* leaf hash : `SerializingHasher>` + — serialize each `Goldilocks` element to its canonical `u64`, then run a + padding-free **overwrite-mode** sponge (rate 17 lanes, capacity 8, output 4) + built on the keccak-f[1600] permutation (reused from `Keccak.lean`). +* compression: `CompressionFunctionFromHasher, 2, 4>` + — hash the 8 lanes of two concatenated digests into a 4-lane digest. + +`PaddingFreeSponge` differs from keccak-256 (`Keccak.lean`): no `10*1` padding, +rate 17 *lanes* (not 136 bytes), and each input block **overwrites** the rate +region (keccak-256 XORs). A full block triggers a permute; a final partial block +permutes once iff it absorbed ≥1 element; an empty trailing block does not +permute. + +Validated against `multi-stark`'s own hasher (see `pcs_hash_test`, reference +values from the `pcs_ref_values` test in `multi-stark/src/types.rs`). + +## Stage 2+ — TODO + +Merkle `verify_batch` (binary tree, multi-height injection), challenger +threading, the FRI fold chain (`open_input` / `verify_query`), and the final +polynomial check. Until those land, `pcs_verify` remains an accept-stub. +-/ + +public section + +namespace MultiStark + +def pcs := ⟦ + -- ========================================================================== + -- PaddingFreeSponge in overwrite mode. + -- + -- Lanes are the keccak `Lane` type (`&[U8; 8]`, LE). A `U64` opened value is + -- (for an honest, canonical proof) exactly the 8 LE bytes of a lane, so a lane + -- is just `store(u64)`. The 25-lane keccak `State` and `keccak_f_fold` are + -- reused from `Keccak.lean`. + -- ========================================================================== + + -- Take one input element as the next rate lane, or keep the existing state + -- lane `dflt` when the input is exhausted. The flag is 1 iff an input element + -- was consumed (input is consumed front-to-back, so the per-block flags are a + -- run of 1s followed by 0s). + fn u64_pick(vals: List‹U64›, dflt: Lane) -> (Lane, List‹U64›, G) { + match load(vals) { + ListNode.Nil => (dflt, vals, 0), + ListNode.Cons(x, rest) => (store(x), rest, 1), + } + } + + -- Absorb the input lanes into the state, 17 (= RATE) at a time, overwriting + -- the rate region. After a FULL block of 17, permute and continue. A final + -- partial block (1..16 elements) permutes once; an exactly-empty trailing + -- block does not permute (matches `PaddingFreeSponge::hash_iter`). + fn pf_absorb(sp: State, vals: List‹U64›) -> State { + let old = load(sp); + let (l0, v1, f0) = u64_pick(vals, old[0]); + let (l1, v2, _f1) = u64_pick(v1, old[1]); + let (l2, v3, _f2) = u64_pick(v2, old[2]); + let (l3, v4, _f3) = u64_pick(v3, old[3]); + let (l4, v5, _f4) = u64_pick(v4, old[4]); + let (l5, v6, _f5) = u64_pick(v5, old[5]); + let (l6, v7, _f6) = u64_pick(v6, old[6]); + let (l7, v8, _f7) = u64_pick(v7, old[7]); + let (l8, v9, _f8) = u64_pick(v8, old[8]); + let (l9, v10, _f9) = u64_pick(v9, old[9]); + let (l10, v11, _fa) = u64_pick(v10, old[10]); + let (l11, v12, _fb) = u64_pick(v11, old[11]); + let (l12, v13, _fc) = u64_pick(v12, old[12]); + let (l13, v14, _fd) = u64_pick(v13, old[13]); + let (l14, v15, _fe) = u64_pick(v14, old[14]); + let (l15, v16, _ff) = u64_pick(v15, old[15]); + let (l16, v17, f16) = u64_pick(v16, old[16]); + let sp2 = store([l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, + l14, l15, l16, old[17], old[18], old[19], old[20], old[21], + old[22], old[23], old[24]]); + match f16 { + 1 => pf_absorb(keccak_f_fold(sp2, 24), v17), + _ => match f0 { + 0 => sp2, + _ => keccak_f_fold(sp2, 24), + }, + } + } + + -- Hash a list of `u64` lanes (a serialized field-element row, or two + -- concatenated digests) into a 4-lane `Digest`. + fn pf_sponge_u64(vals: List‹U64›) -> Digest { + let z = store([0u8; 8]); + let init = store([z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z]); + let sp = pf_absorb(init, vals); + let s = load(sp); + [load(s[0]), load(s[1]), load(s[2]), load(s[3])] + } + + -- The MMCS leaf hash of a single matrix row (`SerializingHasher` over the row + -- of canonical `u64`s). For a leaf joining several same-height matrices, the + -- rows are concatenated first (see `verify_batch`, future work). + fn mmcs_hash_row(row: List‹U64›) -> Digest { + pf_sponge_u64(row) + } + + -- The MMCS 2-to-1 compression: hash the 8 lanes of two concatenated digests. + fn mmcs_compress(a: Digest, b: Digest) -> Digest { + let t0 = store(ListNode.Nil); + let t1 = store(ListNode.Cons(b[3], t0)); + let t2 = store(ListNode.Cons(b[2], t1)); + let t3 = store(ListNode.Cons(b[1], t2)); + let t4 = store(ListNode.Cons(b[0], t3)); + let t5 = store(ListNode.Cons(a[3], t4)); + let t6 = store(ListNode.Cons(a[2], t5)); + let t7 = store(ListNode.Cons(a[1], t6)); + let t8 = store(ListNode.Cons(a[0], t7)); + pf_sponge_u64(t8) + } + + -- ========================================================================== + -- Merkle MMCS `verify_batch` (binary tree, DIGEST_ELEMS = 4). + -- + -- Ports `multi-stark/Plonky3/merkle-tree/src/mmcs.rs::verify_batch` for the + -- binary (N = 2) case. All committed matrices have power-of-two heights, so a + -- matrix's height is `2^log_height`. The opened rows arrive in matrix order; + -- `lhs` is the matching list of per-matrix log-heights. The query `index` is + -- threaded as a bit list (LSB first = leaf→root path) to avoid field division. + -- + -- The leaf hash joins all matrices at the maximum log-height. Walking down, + -- each level folds with one proof sibling (ordered by the path bit), then — + -- if any matrix lives at the new log-height — injects that matrix group's leaf + -- hash via a second compression (this consumes no proof sibling), exactly as + -- the Rust loop's `next_height_openings_digest` injection. + -- ========================================================================== + + -- 1 iff two digests are equal (compared as field elements; hash outputs are + -- canonical so this is exact). + fn digest_eq(a: Digest, b: Digest) -> G { + eq_zero(limb_to_field(a[0]) - limb_to_field(b[0])) * + eq_zero(limb_to_field(a[1]) - limb_to_field(b[1])) * + eq_zero(limb_to_field(a[2]) - limb_to_field(b[2])) * + eq_zero(limb_to_field(a[3]) - limb_to_field(b[3])) + } + + -- Compress (current, sibling) in path order: path bit 0 ⇒ current is the left + -- child, bit 1 ⇒ current is the right child. + fn compress_ordered(bit: G, d: Digest, s: Digest) -> Digest { + match bit { + 0 => mmcs_compress(d, s), + _ => mmcs_compress(s, d), + } + } + + -- 1 iff some matrix has log-height `target`. + fn has_height(lhs: List‹G›, target: G) -> G { + match load(lhs) { + ListNode.Nil => 0, + ListNode.Cons(h, rest) => + match eq_zero(h - target) { + 1 => 1, + _ => has_height(rest, target), + }, + } + } + + -- Concatenate the opened rows of every matrix whose log-height is `target` + -- (in matrix order — the stable height-sort preserves it), for the joint leaf + -- hash `hash_iter_slices`. + fn concat_at(rows: List‹List‹U64››, lhs: List‹G›, target: G) -> List‹U64› { + match load(rows) { + ListNode.Nil => store(ListNode.Nil), + ListNode.Cons(r, rrest) => + let &ListNode.Cons(lh, lrest) = lhs; + concat_at_step(eq_zero(lh - target), r, rrest, lrest, target), + } + } + fn concat_at_step(hit: G, r: List‹U64›, rrest: List‹List‹U64››, + lrest: List‹G›, target: G) -> List‹U64› { + match hit { + 0 => concat_at(rrest, lrest, target), + _ => list_concat(r, concat_at(rrest, lrest, target)), + } + } + + -- Canonicalize each lane: the MMCS leaf hash serializes `as_canonical_u64`, + -- but opened base values are on the wire in the (possibly non-canonical) + -- internal Goldilocks repr — e.g. field zero ships as `p`. `gl_reduce` maps + -- them to `< p` before hashing (idempotent on already-canonical lanes). + fn canon_lanes(l: List‹U64›) -> List‹U64› { + match load(l) { + ListNode.Nil => store(ListNode.Nil), + ListNode.Cons(x, rest) => store(ListNode.Cons(gl_reduce(x), canon_lanes(rest))), + } + } + -- The joint leaf hash of all matrices at log-height `target`. + fn leaf_hash_at(rows: List‹List‹U64››, lhs: List‹G›, target: G) -> Digest { + pf_sponge_u64(canon_lanes(concat_at(rows, lhs, target))) + } + + -- Inject the leaf hash of any matrices at log-height `lh` (if present) via a + -- second compression onto `d`. + fn inject_maybe(rows: List‹List‹U64››, lhs: List‹G›, lh: G, d: Digest) -> Digest { + match has_height(lhs, lh) { + 0 => d, + _ => mmcs_compress(d, leaf_hash_at(rows, lhs, lh)), + } + } + + -- Recompose remaining path bits (LSB first) into the cap index. + fn bits_to_num(bits: List‹G›) -> G { + match load(bits) { + ListNode.Nil => 0, + ListNode.Cons(b, rest) => b + 2 * bits_to_num(rest), + } + } + + -- Walk the authentication path: one proof sibling per level (fold), with a + -- possible leaf injection at the new log-height `lh`. Returns the recomputed + -- root and the leftover cap index. + fn mmcs_fold(d: Digest, rows: List‹List‹U64››, lhs: List‹G›, + proof: List‹Digest›, ibits: List‹G›, lh: G) -> (Digest, G) { + match load(proof) { + ListNode.Nil => (d, bits_to_num(ibits)), + ListNode.Cons(s, prest) => + let &ListNode.Cons(bit, brest) = ibits; + let d1 = compress_ordered(bit, d, s); + let d2 = inject_maybe(rows, lhs, lh, d1); + mmcs_fold(d2, rows, lhs, prest, brest, lh - 1), + } + } + + -- Recompute the Merkle root from the opened rows + authentication path. + fn mmcs_root(rows: List‹List‹U64››, lhs: List‹G›, ibits: List‹G›, + proof: List‹Digest›, log_max: G) -> (Digest, G) { + let leaf = leaf_hash_at(rows, lhs, log_max); + mmcs_fold(leaf, rows, lhs, proof, ibits, log_max - 1) + } + + -- 1 iff the recomputed root matches the commitment cap at the cap index. + fn mmcs_verify(cap: MerkleCap, rows: List‹List‹U64››, lhs: List‹G›, + ibits: List‹G›, proof: List‹Digest›, log_max: G) -> G { + let (root, capidx) = mmcs_root(rows, lhs, ibits, proof, log_max); + digest_eq(list_lookup(cap, capidx), root) + } + + -- ========================================================================== + -- FRI fold step (`TwoAdicFriFolding::fold_row`, arity-2 case). + -- + -- `innerFri` uses `maxLogArity = 1`, so every FRI fold is binary. Ports + -- `fold_row` for `log_arity = 1`: given the sibling pair (e0, e1) of a node + -- and the round challenge β, + -- folded = (e0 + e1)/2 + β·(e0 − e1)/(2s), + -- s = g_{log_height+1}^{reverse_bits_len(index, log_height)} (base field) + -- where `g_k = two_adic_gen(k)`. The index is threaded as its low-`log_height` + -- bit list (LSB first), matching `ch_sample_bits`; `reverse_bits_len` is then + -- just reversing that list. + -- ========================================================================== + + -- Reverse a `G` (bit) list onto `acc`. + fn glist_rev(l: List‹G›, acc: List‹G›) -> List‹G› { + match load(l) { + ListNode.Nil => acc, + ListNode.Cons(b, rest) => glist_rev(rest, store(ListNode.Cons(b, acc))), + } + } + + -- base^(Σ bits_i · 2^i), bits LSB-first (square-and-multiply over the bits). + -- `base` is a non-native Goldilocks element; `bits` is a native bit list. + fn exp_by_bits(base: [U8; 8], bits: List‹G›) -> [U8; 8] { + match load(bits) { + ListNode.Nil => gl_one(), + ListNode.Cons(b, rest) => + let half = exp_by_bits(gl_sq(base), rest); + match b { + 0 => half, + _ => gl_mul(base, half), + }, + } + } + + -- The arity-2 FRI fold. `index_bits` = the low `log_height` index bits, LSB + -- first (so `reverse_bits_len` = reversing the list). + fn fri_fold2(index_bits: List‹G›, log_height: G, beta: Ext, e0: Ext, e1: Ext) -> Ext { + let g = two_adic_gen(log_height + 1); + let s = exp_by_bits(g, glist_rev(index_bits, store(ListNode.Nil))); + let two_s = gl_add(s, s); + let t1 = eg_div(eg_add(e0, e1), [gl_two(), gl_zero()]); + let t2 = eg_mul(beta, eg_div(eg_sub(e0, e1), [two_s, gl_zero()])); + eg_add(t1, t2) + } + + -- ========================================================================== + -- `open_input` reduced openings (`fri/verifier.rs::open_input` inner loop). + -- + -- For a matrix opened at a point z with verifier domain point x, accumulate + -- over the matrix columns: + -- ro += alpha_pow · (p_z − p_x) · q ; alpha_pow *= alpha, q = 1/(z − x) + -- where p_x are the INPUT opened base values (from the query's batch opening, + -- authenticated by the input MMCS) and p_z the OOD opened ext values. The + -- query domain point is + -- x = GENERATOR(7) · two_adic_gen(log_height)^{reverse_bits_len(idx, log_height)}. + -- All extension arithmetic — no Merkle hashing here. + -- ========================================================================== + + -- The base-field query domain point x. `index_bits` = low-`log_height` index + -- bits, LSB first (so reverse_bits_len = reversing the list). + fn ro_x(index_bits: List‹G›, log_height: G) -> [U8; 8] { + gl_mul(gl_seven(), exp_by_bits(two_adic_gen(log_height), glist_rev(index_bits, store(ListNode.Nil)))) + } + + -- Accumulate one matrix-point's column contributions. `q = 1/(z − x)`. + fn ro_fold(p_x: List‹[U8; 8]›, p_z: List‹Ext›, q: Ext, alpha: Ext, ro: Ext, ap: Ext) + -> (Ext, Ext) { + match load(p_x) { + ListNode.Nil => (ro, ap), + ListNode.Cons(px, pxr) => + let &ListNode.Cons(pz, pzr) = p_z; + let term = eg_mul(eg_mul(ap, eg_sub(pz, [px, gl_zero()])), q); + ro_fold(pxr, pzr, q, alpha, eg_add(ro, term), eg_mul(ap, alpha)), + } + } + + -- ========================================================================== + -- PCS (FRI) verification — `two_adic_pcs::verify` + `fri::verify_fri`. + -- + -- Specialised to `innerFri`: arity 2 (log_arity = 1 every round), + -- log_blowup = 1, log_final_poly_len = 0 (final_poly is ONE constant + -- coefficient ⇒ the final Horner eval is just `final_poly[0]`, no `x` needed), + -- num_queries = 1, commit/query PoW bits = 0 (a challenger no-op). Field + -- arithmetic is the non-native byte Goldilocks (`gl_*`/`eg_*`). + -- + -- A reduced-opening accumulator, one per distinct log-height. `alpha_pow` + -- threads across every (batch, matrix, point, column) at that height, in the + -- prover's observation order (stage_1, stage_2, quotient, preprocessed). + -- ========================================================================== + enum Bucket { Mk(G, Ext, Ext) } -- log_height, alpha_pow, reduced_opening + + -- ── challenger: observe the opened values (observe_algebra_slice) ────────── + -- One ext element = its two base coordinates, each 8 LE bytes. + fn obs_ext_row(input: ByteStream, row: List‹Ext›) -> ByteStream { + match load(row) { + ListNode.Nil => input, + ListNode.Cons(e, rest) => obs_ext_row(snoc_b8(snoc_b8(input, e[0]), e[1]), rest), + } + } + fn obs_points(input: ByteStream, pts: List‹List‹Ext››) -> ByteStream { + match load(pts) { + ListNode.Nil => input, + ListNode.Cons(row, rest) => obs_points(obs_ext_row(input, row), rest), + } + } + fn obs_round(input: ByteStream, round: OpenedRound) -> ByteStream { + match load(round) { + ListNode.Nil => input, + ListNode.Cons(mat, rest) => obs_round(obs_points(input, mat), rest), + } + } + fn obs_prep(input: ByteStream, prep_opt: PreprocessedOpt) -> ByteStream { + match prep_opt { + PreprocessedOpt.NoPreprocessed => input, + PreprocessedOpt.SomePreprocessed(round) => obs_round(input, round), + } + } + -- Observe one Val (= 1) per FRI round, the variable-arity schedule. + fn obs_log_arities(input: ByteStream, comms: List‹MerkleCap›) -> ByteStream { + match load(comms) { + ListNode.Nil => input, + ListNode.Cons(_c, rest) => + obs_log_arities(snoc_b8(input, [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), rest), + } + } + -- Per round: observe the commitment, then sample β (PoW bits = 0 ⇒ no-op). + -- `GrindingChallenger::check_witness` for one commit round: when `bits > 0`, + -- observe the PoW witness then sample `bits` bits and assert they are all zero + -- (else InvalidPowWitness). Returns the post-PoW `(input, output)` so the + -- immediately-following β sample continues the SAME hash stream (no observe in + -- between). `bits == 0` is the short-circuit: no observe, no sample. + fn pcs_commit_pow(input: ByteStream, witness: U64, bits: G) -> (ByteStream, ByteStream) { + match bits { + 0 => (input, store(ListNode.Nil)), + _ => + let (pbits, i1, o1) = ch_sample_bits(snoc_b8(input, witness), store(ListNode.Nil), bits); + assert_eq!(bits_to_num(pbits), 0); + (i1, o1), + } + } + -- Per round: observe the commitment, run the commit-phase PoW check, then + -- sample the folding challenge β (continuing the challenger past the PoW). + fn pcs_betas(input: ByteStream, comms: List‹MerkleCap›, witnesses: List‹U64›, bits: G) + -> (List‹Ext›, ByteStream) { + match load(comms) { + ListNode.Nil => (store(ListNode.Nil), input), + ListNode.Cons(c, rest) => + let &ListNode.Cons(w, wrest) = witnesses; + let (i1, o1) = pcs_commit_pow(snoc_cap(input, c), w, bits); + let (b0, b1, i2, _o) = ch_sample_ext(i1, o1); + let (bs, i3) = pcs_betas(i2, rest, wrest, bits); + (store(ListNode.Cons([gl_reduce(b0), gl_reduce(b1)], bs)), i3), + } + } + + -- ── reduced-opening buckets ─────────────────────────────────────────────── + fn repeat_g(v: G, n: G) -> List‹G› { + match n { + 0 => store(ListNode.Nil), + _ => store(ListNode.Cons(v, repeat_g(v, n - 1))), + } + } + -- 1 iff some circuit `i < rem` has log-height `log_degrees[i] + log_blowup == h`. + fn circ_has_height(log_degrees: List‹U8›, log_blowup: G, rem: G, i: G, h: G) -> G { + match rem { + 0 => 0, + _ => match eq_zero(to_field(list_lookup(log_degrees, i)) + log_blowup - h) { + 1 => 1, + _ => circ_has_height(log_degrees, log_blowup, rem - 1, i + 1, h), + }, + } + } + -- One bucket per distinct log-height, built DESCENDING by counting `h` down + -- from `log_global_max`. Each starts (alpha_pow = 1, reduced_opening = 0). + fn build_buckets(log_degrees: List‹U8›, log_blowup: G, num_circuits: G, h: G) -> List‹Bucket› { + match h { + 0 => store(ListNode.Nil), + _ => match circ_has_height(log_degrees, log_blowup, num_circuits, 0, h) { + 0 => build_buckets(log_degrees, log_blowup, num_circuits, h - 1), + _ => store(ListNode.Cons( + Bucket.Mk(h, [gl_one(), gl_zero()], [gl_zero(), gl_zero()]), + build_buckets(log_degrees, log_blowup, num_circuits, h - 1))), + }, + } + } + -- Find the bucket at log-height `lh`, fold one matrix-point's columns into it + -- (`ro_fold` threads its `alpha_pow`), and write it back. + fn bucket_update(buckets: List‹Bucket›, lh: G, p_x: List‹[U8; 8]›, p_z: List‹Ext›, + q: Ext, alpha: Ext) -> List‹Bucket› { + match load(buckets) { + ListNode.Nil => store(ListNode.Nil), + ListNode.Cons(b, rest) => + let Bucket.Mk(h, ap, ro) = b; + match eq_zero(h - lh) { + 1 => + let (ro2, ap2) = ro_fold(p_x, p_z, q, alpha, ro, ap); + store(ListNode.Cons(Bucket.Mk(h, ap2, ro2), rest)), + _ => store(ListNode.Cons(b, bucket_update(rest, lh, p_x, p_z, q, alpha))), + }, + } + } + -- A bucket at log-height `log_blowup` would come from a trace matrix of height + -- 1 (a constant polynomial); then `(f(ζ) − f(x))/(ζ − x)` must be 0. Assert it + -- (`open_input`'s `FinalPolyMismatch` guard). No-op if no such bucket exists. + fn assert_blowup_zero(buckets: List‹Bucket›, log_blowup: G) -> G { + match load(buckets) { + ListNode.Nil => 1, + ListNode.Cons(b, rest) => + let Bucket.Mk(h, _ap, ro) = b; + match eq_zero(h - log_blowup) { + 1 => assert_eq!(eg_eq(ro, [gl_zero(), gl_zero()]), 1); 1, + _ => assert_blowup_zero(rest, log_blowup), + }, + } + } + -- 1 iff the proof carries a preprocessed opened round (used for the input-proof + -- batch-count check). + fn prep_count(prep_opt: PreprocessedOpt) -> G { + match prep_opt { + PreprocessedOpt.NoPreprocessed => 0, + PreprocessedOpt.SomePreprocessed(_r) => 1, + } + } + -- Compute x = GENERATOR·g^{revbits} for this height and fold the contribution. + fn ri_apply(buckets: List‹Bucket›, lh: G, idxbits: List‹G›, log_gmax: G, + z: Ext, p_x: List‹[U8; 8]›, p_z: List‹Ext›, alpha: Ext) -> List‹Bucket› { + -- the base opening and the ext opening at this point must have equal width + -- (PointEvaluationCountMismatch); `ro_fold` walks them in lockstep. + assert_eq!(eq_zero(list_length(p_x) - list_length(p_z)), 1); + let x = ro_x(list_drop(idxbits, log_gmax - lh), lh); + let q = eg_inverse(eg_sub(z, [x, gl_zero()])); + bucket_update(buckets, lh, p_x, p_z, q, alpha) + } + + -- A stage_1/stage_2/preprocessed-style matrix: two opening points + -- (ζ, ζ·g) with the same base row `p_x`. `g` = trace subgroup generator. + fn open_2pt_mat(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, lh: G, + ldeg: G, zeta: Ext, p_x: List‹[U8; 8]›, mat: List‹List‹Ext››, alpha: Ext) + -> List‹Bucket› { + let pz0 = list_lookup(mat, 0); + let pz1 = list_lookup(mat, 1); + let zn = eg_mul(zeta, [two_adic_gen(ldeg), gl_zero()]); + let b1 = ri_apply(buckets, lh, idxbits, log_gmax, zeta, p_x, pz0, alpha); + ri_apply(b1, lh, idxbits, log_gmax, zn, p_x, pz1, alpha) + } + fn open_batch_2pt(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, log_blowup: G, ci: G, + rem: G, log_degrees: List‹U8›, zeta: Ext, base_rows: List‹List‹U64››, + opened: OpenedRound, alpha: Ext) -> List‹Bucket› { + match rem { + 0 => buckets, + _ => + let ldeg = to_field(list_lookup(log_degrees, ci)); + let b = open_2pt_mat(buckets, idxbits, log_gmax, ldeg + log_blowup, ldeg, zeta, + list_lookup(base_rows, ci), list_lookup(opened, ci), alpha); + open_batch_2pt(b, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, log_degrees, zeta, + base_rows, opened, alpha), + } + } + + -- The quotient batch: `qd` chunks per circuit, one opening point (ζ) each. + fn open_q_chunks(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, lh: G, + chunk: G, qrem: G, zeta: Ext, base_rows: List‹List‹U64››, + q_opened: OpenedRound, alpha: Ext) -> (List‹Bucket›, G) { + match qrem { + 0 => (buckets, chunk), + _ => + let b = ri_apply(buckets, lh, idxbits, log_gmax, zeta, + list_lookup(base_rows, chunk), list_lookup(list_lookup(q_opened, chunk), 0), alpha); + open_q_chunks(b, idxbits, log_gmax, lh, chunk + 1, qrem - 1, zeta, base_rows, q_opened, alpha), + } + } + fn open_quotient(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, log_blowup: G, ci: G, + rem: G, chunk: G, circuits: List‹SysCircuit›, log_degrees: List‹U8›, zeta: Ext, + base_rows: List‹List‹U64››, q_opened: OpenedRound, alpha: Ext) -> List‹Bucket› { + match rem { + 0 => buckets, + _ => + let SysCircuit.Mk(_a, _cc, md, _ph, _pw, _w1, _w2) = list_lookup(circuits, ci); + let qd = quotient_degree_of(md); + let lh = to_field(list_lookup(log_degrees, ci)) + log_blowup; + let (b, chunk2) = open_q_chunks(buckets, idxbits, log_gmax, lh, chunk, qd, zeta, base_rows, q_opened, alpha); + open_quotient(b, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, chunk2, circuits, log_degrees, zeta, base_rows, q_opened, alpha), + } + } + + -- The preprocessed batch: only circuits with `prep_indices[i] = Some(j)`; + -- `k` tracks the position in the preprocessed commitment (= base-row index). + fn open_prep(buckets: List‹Bucket›, idxbits: List‹G›, log_gmax: G, log_blowup: G, ci: G, rem: G, + k: G, log_degrees: List‹U8›, prep_indices: List‹OptIdx›, zeta: Ext, + base_rows: List‹List‹U64››, prep_round: OpenedRound, alpha: Ext) -> List‹Bucket› { + match rem { + 0 => buckets, + _ => match list_lookup(prep_indices, ci) { + OptIdx.NoIdx => + open_prep(buckets, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, k, log_degrees, + prep_indices, zeta, base_rows, prep_round, alpha), + OptIdx.SomeIdx(_j) => + let ldeg = to_field(list_lookup(log_degrees, ci)); + let b = open_2pt_mat(buckets, idxbits, log_gmax, ldeg + log_blowup, ldeg, zeta, + list_lookup(base_rows, k), list_lookup(prep_round, k), alpha); + open_prep(b, idxbits, log_gmax, log_blowup, ci + 1, rem - 1, k + 1, log_degrees, + prep_indices, zeta, base_rows, prep_round, alpha), + }, + } + } + fn open_prep_batch(buckets: List‹Bucket›, input_proof: List‹BatchOpening›, + prep_commit: MerkleCap, prep_opt: PreprocessedOpt, prep_indices: List‹OptIdx›, + log_degrees: List‹U8›, num_circuits: G, idxbits: List‹G›, log_gmax: G, log_blowup: G, + zeta: Ext, alpha: Ext) -> List‹Bucket› { + match prep_opt { + PreprocessedOpt.NoPreprocessed => buckets, + PreprocessedOpt.SomePreprocessed(prep_round) => + let BatchOpening.Mk(rows_p, proof_p) = list_lookup(input_proof, 3); + -- one opened base row per preprocessed matrix (BatchOpenedValuesCountMismatch) + assert_eq!(eq_zero(list_length(rows_p) - list_length(prep_round)), 1); + assert_eq!(mmcs_verify(prep_commit, rows_p, + heights_prep(log_degrees, log_blowup, prep_indices, num_circuits, 0), idxbits, proof_p, log_gmax), 1); + open_prep(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, 0, log_degrees, + prep_indices, zeta, rows_p, prep_round, alpha), + } + } + + -- ── per-batch input-MMCS matrix log-heights (`log_degree + log_blowup`) ──── + fn heights_all(log_degrees: List‹U8›, log_blowup: G, rem: G, i: G) -> List‹G› { + match rem { + 0 => store(ListNode.Nil), + _ => store(ListNode.Cons(to_field(list_lookup(log_degrees, i)) + log_blowup, + heights_all(log_degrees, log_blowup, rem - 1, i + 1))), + } + } + fn heights_quotient(circuits: List‹SysCircuit›, log_degrees: List‹U8›, log_blowup: G, rem: G, i: G) -> List‹G› { + match rem { + 0 => store(ListNode.Nil), + _ => + let SysCircuit.Mk(_a, _cc, md, _ph, _pw, _w1, _w2) = list_lookup(circuits, i); + list_concat(repeat_g(to_field(list_lookup(log_degrees, i)) + log_blowup, quotient_degree_of(md)), + heights_quotient(circuits, log_degrees, log_blowup, rem - 1, i + 1)), + } + } + fn heights_prep(log_degrees: List‹U8›, log_blowup: G, prep_indices: List‹OptIdx›, rem: G, i: G) -> List‹G› { + match rem { + 0 => store(ListNode.Nil), + _ => match list_lookup(prep_indices, i) { + OptIdx.NoIdx => heights_prep(log_degrees, log_blowup, prep_indices, rem - 1, i + 1), + OptIdx.SomeIdx(_j) => + store(ListNode.Cons(to_field(list_lookup(log_degrees, i)) + log_blowup, + heights_prep(log_degrees, log_blowup, prep_indices, rem - 1, i + 1))), + }, + } + } + + -- ── FRI fold chain (`verify_query`, arity 2) ────────────────────────────── + -- Reconstruct the sibling pair: evals[index_in_group] = folded, other = sib. + fn recon_evals(bit: G, folded: Ext, sib: Ext) -> (Ext, Ext) { + match bit { + 0 => (folded, sib), + _ => (sib, folded), + } + } + -- Flatten two ext evals to the 4 base coords of the ExtensionMmcs leaf row. + fn flatten2(e0: Ext, e1: Ext) -> List‹U64› { + store(ListNode.Cons(e0[0], store(ListNode.Cons(e0[1], + store(ListNode.Cons(e1[0], store(ListNode.Cons(e1[1], store(ListNode.Nil))))))))) + } + -- Roll the next reduced opening into the folded eval when its height matches + -- the new folded height: `folded += beta^(2^log_arity) · ro` (log_arity = 1). + fn rollin(folded: Ext, log_folded: G, beta: Ext, ro_rest: List‹Bucket›) -> (Ext, List‹Bucket›) { + match load(ro_rest) { + ListNode.Nil => (folded, ro_rest), + ListNode.Cons(b, rest) => + let Bucket.Mk(h, _ap, ro) = b; + match eq_zero(h - log_folded) { + 1 => (eg_add(folded, eg_mul(ext_exp_pow2(beta, 1), ro)), rest), + _ => (folded, ro_rest), + }, + } + } + fn verify_query(folded: Ext, betas: List‹Ext›, comms: List‹MerkleCap›, + openings: List‹CommitPhaseProofStep›, domidx: List‹G›, log_cur: G, + ro_rest: List‹Bucket›, log_final: G) -> Ext { + match load(openings) { + ListNode.Nil => + -- must have folded down to exactly the final domain size, and every + -- reduced opening must have been rolled in (FinalFoldHeightMismatch / + -- UnconsumedReducedOpenings). + assert_eq!(eq_zero(log_cur - log_final), 1); + assert_eq!(list_length(ro_rest), 0); + folded, + ListNode.Cons(op, op_rest) => + let &ListNode.Cons(beta, beta_rest) = betas; + let &ListNode.Cons(comm, comm_rest) = comms; + let CommitPhaseProofStep.Mk(_la, sibs, oproof) = op; + -- arity 2 ⇒ exactly arity-1 = 1 sibling (SiblingValuesLengthMismatch). + assert_eq!(list_length(sibs), 1); + let &ListNode.Cons(ibit, idrest) = domidx; -- index_in_group = LSB + let log_folded = log_cur - 1; + let (e0, e1) = recon_evals(ibit, folded, list_lookup(sibs, 0)); + -- authenticate the sibling pair against this round's commitment + assert_eq!(mmcs_verify(comm, store(ListNode.Cons(flatten2(e0, e1), store(ListNode.Nil))), + store(ListNode.Cons(log_folded, store(ListNode.Nil))), idrest, oproof, log_folded), 1); + let folded1 = fri_fold2(idrest, log_folded, beta, e0, e1); + let (folded2, ro_rest2) = rollin(folded1, log_folded, beta, ro_rest); + verify_query(folded2, beta_rest, comm_rest, op_rest, idrest, log_folded, ro_rest2, log_final), + } + } + + -- ── one FRI query ───────────────────────────────────────────────────────── + -- For the query index `idxbits`: build the reduced-opening accumulators, + -- authenticate each input batch (input MMCS), run the fold chain, and check + -- the final polynomial. `log_final = log_blowup` (log_final_poly_len = 0). + fn verify_one_query(idxbits: List‹G›, qp: QueryProof, alpha: Ext, + stage1: OpenedRound, stage2: OpenedRound, q_opened: OpenedRound, + prep_opt: PreprocessedOpt, s1c: MerkleCap, s2c: MerkleCap, qc: MerkleCap, + prep_commit: MerkleCap, circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›, + log_degrees: List‹U8›, zeta: Ext, num_circuits: G, log_blowup: G, log_gmax: G, + betas: List‹Ext›, commit_phase_commits: List‹MerkleCap›, final_poly: List‹Ext›, + num_rounds: G) -> G { + let QueryProof.Mk(input_proof, commit_phase_openings) = qp; + -- one commit-phase opening per round (QueryCommitPhaseOpeningsCountMismatch), + -- one input batch per commitment (InputProofBatchCountMismatch). + assert_eq!(eq_zero(list_length(commit_phase_openings) - num_rounds), 1); + assert_eq!(eq_zero(list_length(input_proof) - (3 + prep_count(prep_opt))), 1); + let buckets = build_buckets(log_degrees, log_blowup, num_circuits, log_gmax); + let BatchOpening.Mk(rows_s1, proof_s1) = list_lookup(input_proof, 0); + assert_eq!(eq_zero(list_length(rows_s1) - num_circuits), 1); + assert_eq!(mmcs_verify(s1c, rows_s1, heights_all(log_degrees, log_blowup, num_circuits, 0), idxbits, proof_s1, log_gmax), 1); + let buckets = open_batch_2pt(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, log_degrees, zeta, rows_s1, stage1, alpha); + let BatchOpening.Mk(rows_s2, proof_s2) = list_lookup(input_proof, 1); + assert_eq!(eq_zero(list_length(rows_s2) - num_circuits), 1); + assert_eq!(mmcs_verify(s2c, rows_s2, heights_all(log_degrees, log_blowup, num_circuits, 0), idxbits, proof_s2, log_gmax), 1); + let buckets = open_batch_2pt(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, log_degrees, zeta, rows_s2, stage2, alpha); + let BatchOpening.Mk(rows_q, proof_q) = list_lookup(input_proof, 2); + assert_eq!(eq_zero(list_length(rows_q) - list_length(q_opened)), 1); + assert_eq!(mmcs_verify(qc, rows_q, heights_quotient(circuits, log_degrees, log_blowup, num_circuits, 0), idxbits, proof_q, log_gmax), 1); + let buckets = open_quotient(buckets, idxbits, log_gmax, log_blowup, 0, num_circuits, 0, circuits, log_degrees, zeta, rows_q, q_opened, alpha); + let buckets = open_prep_batch(buckets, input_proof, prep_commit, prep_opt, prep_indices, log_degrees, num_circuits, idxbits, log_gmax, log_blowup, zeta, alpha); + -- a height-`log_blowup` (constant-poly) reduced opening must be zero + let _cz = assert_blowup_zero(buckets, log_blowup); + -- the first reduced opening must sit at log_global_max_height + -- (InitialReducedOpeningHeightMismatch). + let &ListNode.Cons(b0, ro_rest) = buckets; + let Bucket.Mk(h0, _ap0, folded_start) = b0; + assert_eq!(eq_zero(h0 - log_gmax), 1); + let folded = verify_query(folded_start, betas, commit_phase_commits, commit_phase_openings, idxbits, log_gmax, ro_rest, log_blowup); + -- final check: with log_final_poly_len = 0, eval = final_poly[0] + assert_eq!(eg_eq(list_lookup(final_poly, 0), folded), 1); + 1 + } + + -- Loop over all `num_queries` query proofs, sampling one index per query + -- (consecutive `sample_bits` continue the same challenger stream). + fn query_loop(input: ByteStream, output: ByteStream, query_proofs: List‹QueryProof›, + alpha: Ext, stage1: OpenedRound, stage2: OpenedRound, q_opened: OpenedRound, + prep_opt: PreprocessedOpt, s1c: MerkleCap, s2c: MerkleCap, qc: MerkleCap, + prep_commit: MerkleCap, circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›, + log_degrees: List‹U8›, zeta: Ext, num_circuits: G, log_blowup: G, log_gmax: G, + betas: List‹Ext›, commit_phase_commits: List‹MerkleCap›, final_poly: List‹Ext›, + num_rounds: G) -> G { + match load(query_proofs) { + ListNode.Nil => 1, + ListNode.Cons(qp, rest) => + let (idxbits, input2, output2) = ch_sample_bits(input, output, log_gmax); + let _q = verify_one_query(idxbits, qp, alpha, stage1, stage2, q_opened, prep_opt, + s1c, s2c, qc, prep_commit, circuits, prep_indices, log_degrees, zeta, num_circuits, + log_blowup, log_gmax, betas, commit_phase_commits, final_poly, num_rounds); + query_loop(input2, output2, rest, alpha, stage1, stage2, q_opened, prep_opt, + s1c, s2c, qc, prep_commit, circuits, prep_indices, log_degrees, zeta, num_circuits, + log_blowup, log_gmax, betas, commit_phase_commits, final_poly, num_rounds), + } + } + + -- ── top-level FRI verification ──────────────────────────────────────────── + -- `log_blowup` comes from the verifying key; `num_queries` / `commit_pow_bits` + -- are the protocol's FRI parameters (public). `query_pow_bits` is taken to be 0 + -- (our system), so the query-phase grinding check is a no-op. + fn pcs_fri_verify(post_zeta_input: ByteStream, stage1: OpenedRound, stage2: OpenedRound, + q_opened: OpenedRound, prep_opt: PreprocessedOpt, opening: FriProof, + s1c: MerkleCap, s2c: MerkleCap, qc: MerkleCap, prep_commit: MerkleCap, + circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›, log_degrees: List‹U8›, + zeta: Ext, num_circuits: G, log_blowup: G, num_queries: G, commit_pow_bits: G) -> G { + let FriProof.Mk(commit_phase_commits, pw, query_proofs, final_poly, _qpw) = opening; + let num_rounds = list_length(commit_phase_commits); + -- FRI shape: one PoW witness per round, num_queries query proofs, and (since + -- log_final_poly_len = 0) a single final-poly coefficient. + assert_eq!(eq_zero(list_length(pw) - num_rounds), 1); + assert_eq!(eq_zero(list_length(query_proofs) - num_queries), 1); + assert_eq!(list_length(final_poly), 1); + -- challenger continuation: observe all opened values (coms_to_verify order) + let input = obs_round(post_zeta_input, stage1); + let input = obs_round(input, stage2); + let input = obs_round(input, q_opened); + let input = obs_prep(input, prep_opt); + -- PCS batch-combination challenge α + let (a0, a1, input, _oa) = ch_sample_ext(input, store(ListNode.Nil)); + let alpha = [gl_reduce(a0), gl_reduce(a1)]; + -- per-round FRI fold challenges β (with commit-phase PoW), then observe + -- final_poly + the log-arity schedule. + let (betas, input) = pcs_betas(input, commit_phase_commits, pw, commit_pow_bits); + let input = obs_ext_row(input, final_poly); + let input = obs_log_arities(input, commit_phase_commits); + -- query indices + per-query verification (log_global_max_height = #rounds + log_blowup) + let log_gmax = num_rounds + log_blowup; + query_loop(input, store(ListNode.Nil), query_proofs, alpha, stage1, stage2, q_opened, + prep_opt, s1c, s2c, qc, prep_commit, circuits, prep_indices, log_degrees, zeta, + num_circuits, log_blowup, log_gmax, betas, commit_phase_commits, final_poly, num_rounds) + } + +⟧ + +end MultiStark + +end diff --git a/Ix/MultiStark/SystemDeserialize.lean b/Ix/MultiStark/SystemDeserialize.lean new file mode 100644 index 00000000..fbbbc046 --- /dev/null +++ b/Ix/MultiStark/SystemDeserialize.lean @@ -0,0 +1,286 @@ +module +public import Ix.Aiur.Meta +public import Ix.IxVM.Core +public import Ix.IxVM.ByteStream +public import Ix.MultiStark.Deserialize + +/-! +# Verifying-key deserializer (Aiur) + +Aiur port of `src/aiur/vk_codec.rs::manual_deserialize` — reconstructs the +verifier's `System` from the bytes the prover places on the IO +channel. Same bincode wire format the Rust side validated against serde: + +* enum tag : `u32`, 4 bytes LE +* `Option` : 1 tag byte (`0` = None, `1` = Some) +* `usize`/`u64` : 8 bytes LE +* `Vec` : `u64` length, then elements +* struct : fields in declaration order +* `Range` : `start`, `end` (two `u64`) +* `MerkleCap` : `Vec<[u64; 4]>` +* Goldilocks `G` : raw `u64`, 8 bytes LE (reduced mod p on read) + +Reuses the proof deserializer's byte primitives (`read_u8`, `read_u64`, +`read_count`, `read_merkle_cap`, `limb_to_field`, `Digest`, `MerkleCap`). +-/ + +public section + +namespace MultiStark + +def systemDeserialize := ⟦ + -- ========================================================================== + -- Reconstructed `System` as Aiur data. + -- ========================================================================== + + -- `SymbolicVariable.entry` (a column reference within the window). + enum SysEntry { + Preprocessed(G), -- offset + Main(G), + Stage2(G), + Public, + Stage2Public, + Challenge + } + + -- `SymbolicExpression` — the AIR constraint tree. Children are pointers; + -- the trailing `G` on Add/Sub/Neg/Mul is the cached `degree_multiple`. + enum SymExpr { + Var(SysEntry, G), -- entry, index + IsFirstRow, + IsLastRow, + IsTransition, + Const([U8; 8]), -- a non-native Goldilocks constant (canonical LE bytes) + Add(&SymExpr, &SymExpr, G), + Sub(&SymExpr, &SymExpr, G), + Neg(&SymExpr, G), + Mul(&SymExpr, &SymExpr, G) + } + + enum SysLookup { Mk(SymExpr, List‹SymExpr›) } -- multiplicity, args + + enum SysConstraints { Mk(List‹SymExpr›, G, G, G) } -- zeros, sel_start, sel_end, width + + enum SysMemory { Mk(G) } -- width + + enum SysAir { + Function(SysConstraints), + Memory(SysMemory), + Bytes1, + Bytes2 + } + + enum SysLookupAir { Mk(SysAir, List‹SysLookup›) } -- inner_air, lookups + + -- air, constraint_count, max_constraint_degree, preprocessed_height, + -- preprocessed_width, stage_1_width, stage_2_width. + enum SysCircuit { Mk(SysLookupAir, G, G, G, G, G, G) } + + enum SysParams { Mk(G, G) } -- log_blowup, cap_height + + -- `Option`s as dedicated non-generic enums (unambiguous constructors). + enum OptCommit { NoCommit, SomeCommit(MerkleCap) } + enum OptIdx { NoIdx, SomeIdx(G) } + + -- commitment_parameters, circuits, preprocessed_commit, preprocessed_indices. + enum Sys { Mk(SysParams, List‹SysCircuit›, OptCommit, List‹OptIdx›) } + + -- ========================================================================== + -- Byte primitives specific to the VK format. + -- ========================================================================== + + -- A `u32` enum tag: 4 little-endian bytes folded into a field element. + fn read_u32(stream: ByteStream) -> (G, ByteStream) { + let (b0, s0) = read_u8(stream); + let (b1, s1) = read_u8(s0); + let (b2, s2) = read_u8(s1); + let (b3, s3) = read_u8(s2); + (to_field(b0) + 0x100 * to_field(b1) + 0x10000 * to_field(b2) + + 0x1000000 * to_field(b3), s3) + } + + -- A raw `u64` Goldilocks constant, canonicalized into non-native Goldilocks + -- bytes (`gl_reduce`) so it can feed the byte-emulated composition arithmetic. + fn read_field(stream: ByteStream) -> ([U8; 8], ByteStream) { + let (u, s) = read_u64(stream); + (gl_reduce(u), s) + } + + -- ========================================================================== + -- Recursive readers (mirror `vk_codec` reader-by-reader). + -- ========================================================================== + + fn read_sys_entry(stream: ByteStream) -> (SysEntry, ByteStream) { + let (tag, s) = read_u32(stream); + match tag { + 0 => let (o, s1) = read_count(s); (SysEntry.Preprocessed(o), s1), + 1 => let (o, s1) = read_count(s); (SysEntry.Main(o), s1), + 2 => let (o, s1) = read_count(s); (SysEntry.Stage2(o), s1), + 3 => (SysEntry.Public, s), + 4 => (SysEntry.Stage2Public, s), + _ => (SysEntry.Challenge, s), + } + } + + fn read_sym_expr(stream: ByteStream) -> (SymExpr, ByteStream) { + let (tag, s) = read_u32(stream); + match tag { + 0 => + let (e, s1) = read_sys_entry(s); + let (i, s2) = read_count(s1); + (SymExpr.Var(e, i), s2), + 1 => (SymExpr.IsFirstRow, s), + 2 => (SymExpr.IsLastRow, s), + 3 => (SymExpr.IsTransition, s), + 4 => let (c, s1) = read_field(s); (SymExpr.Const(c), s1), + 5 => + let (x, s1) = read_sym_expr(s); + let (y, s2) = read_sym_expr(s1); + let (d, s3) = read_count(s2); + (SymExpr.Add(store(x), store(y), d), s3), + 6 => + let (x, s1) = read_sym_expr(s); + let (y, s2) = read_sym_expr(s1); + let (d, s3) = read_count(s2); + (SymExpr.Sub(store(x), store(y), d), s3), + 7 => + let (x, s1) = read_sym_expr(s); + let (d, s2) = read_count(s1); + (SymExpr.Neg(store(x), d), s2), + _ => + let (x, s1) = read_sym_expr(s); + let (y, s2) = read_sym_expr(s1); + let (d, s3) = read_count(s2); + (SymExpr.Mul(store(x), store(y), d), s3), + } + } + + fn read_sym_exprs(stream: ByteStream) -> (List‹SymExpr›, ByteStream) { + let (n, s) = read_count(stream); + read_sym_exprs_n(s, n) + } + fn read_sym_exprs_n(stream: ByteStream, n: G) -> (List‹SymExpr›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_sym_expr(stream); + let (rest, s2) = read_sym_exprs_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_sys_lookup(stream: ByteStream) -> (SysLookup, ByteStream) { + let (m, s) = read_sym_expr(stream); + let (args, s2) = read_sym_exprs(s); + (SysLookup.Mk(m, args), s2) + } + fn read_sys_lookups(stream: ByteStream) -> (List‹SysLookup›, ByteStream) { + let (n, s) = read_count(stream); + read_sys_lookups_n(s, n) + } + fn read_sys_lookups_n(stream: ByteStream, n: G) -> (List‹SysLookup›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_sys_lookup(stream); + let (rest, s2) = read_sys_lookups_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_sys_constraints(stream: ByteStream) -> (SysConstraints, ByteStream) { + let (zeros, s) = read_sym_exprs(stream); + let (sel_start, s1) = read_count(s); + let (sel_end, s2) = read_count(s1); + let (width, s3) = read_count(s2); + (SysConstraints.Mk(zeros, sel_start, sel_end, width), s3) + } + + fn read_sys_air(stream: ByteStream) -> (SysAir, ByteStream) { + let (tag, s) = read_u32(stream); + match tag { + 0 => let (c, s1) = read_sys_constraints(s); (SysAir.Function(c), s1), + 1 => let (w, s1) = read_count(s); (SysAir.Memory(SysMemory.Mk(w)), s1), + 2 => (SysAir.Bytes1, s), + _ => (SysAir.Bytes2, s), + } + } + + fn read_sys_lookupair(stream: ByteStream) -> (SysLookupAir, ByteStream) { + let (inner, s) = read_sys_air(stream); + let (lookups, s1) = read_sys_lookups(s); + (SysLookupAir.Mk(inner, lookups), s1) + } + + fn read_sys_circuit(stream: ByteStream) -> (SysCircuit, ByteStream) { + let (air, s) = read_sys_lookupair(stream); + let (cc, s1) = read_count(s); + let (md, s2) = read_count(s1); + let (ph, s3) = read_count(s2); + let (pw, s4) = read_count(s3); + let (w1, s5) = read_count(s4); + let (w2, s6) = read_count(s5); + (SysCircuit.Mk(air, cc, md, ph, pw, w1, w2), s6) + } + fn read_sys_circuits(stream: ByteStream) -> (List‹SysCircuit›, ByteStream) { + let (n, s) = read_count(stream); + read_sys_circuits_n(s, n) + } + fn read_sys_circuits_n(stream: ByteStream, n: G) -> (List‹SysCircuit›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_sys_circuit(stream); + let (rest, s2) = read_sys_circuits_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + -- `Option` tag is a single byte (bincode special-cases Option). + fn read_opt_commit(stream: ByteStream) -> (OptCommit, ByteStream) { + let (tag, s) = read_u8(stream); + match tag { + 0 => (OptCommit.NoCommit, s), + _ => let (c, s1) = read_merkle_cap(s); (OptCommit.SomeCommit(c), s1), + } + } + fn read_opt_idx(stream: ByteStream) -> (OptIdx, ByteStream) { + let (tag, s) = read_u8(stream); + match tag { + 0 => (OptIdx.NoIdx, s), + _ => let (i, s1) = read_count(s); (OptIdx.SomeIdx(i), s1), + } + } + fn read_opt_idx_list(stream: ByteStream) -> (List‹OptIdx›, ByteStream) { + let (n, s) = read_count(stream); + read_opt_idx_list_n(s, n) + } + fn read_opt_idx_list_n(stream: ByteStream, n: G) -> (List‹OptIdx›, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (x, s) = read_opt_idx(stream); + let (rest, s2) = read_opt_idx_list_n(s, n - 1); + (store(ListNode.Cons(x, rest)), s2), + } + } + + fn read_sys_params(stream: ByteStream) -> (SysParams, ByteStream) { + let (log_blowup, s) = read_count(stream); + let (cap_height, s1) = read_count(s); + (SysParams.Mk(log_blowup, cap_height), s1) + } + + -- Full `System`. + fn read_system(stream: ByteStream) -> (Sys, ByteStream) { + let (params, s) = read_sys_params(stream); + let (circuits, s1) = read_sys_circuits(s); + let (commit, s2) = read_opt_commit(s1); + let (indices, s3) = read_opt_idx_list(s2); + (Sys.Mk(params, circuits, commit, indices), s3) + } +⟧ + +end MultiStark + +end diff --git a/Ix/MultiStark/Tests.lean b/Ix/MultiStark/Tests.lean new file mode 100644 index 00000000..80148123 --- /dev/null +++ b/Ix/MultiStark/Tests.lean @@ -0,0 +1,294 @@ +module +public import Ix.Aiur.Meta + +/-! +# Self-tests for the Multi-STARK recursive verifier + +The `pub fn *_test` entrypoints validating the verifier's primitives against +Rust reference values (`multi-stark/src/types.rs` test outputs). They live in a +SEPARATE toplevel fragment, merged on top of the production verifier toplevel +(`MultiStark.multiStarkTests` in `Ix/MultiStark.lean`): every `pub fn` adds a +circuit to the compiled system, so keeping the tests out of +`MultiStark.multiStark` keeps the production verifier's width free of +test-only circuits. + +Cross-fragment references (e.g. `gl_add`, `mmcs_hash_row`, `fri_fold2`, +`ch_sample_bits`) resolve after the merge — the merged toplevel has a flat +namespace. +-/ + +public section + +namespace MultiStark + +def tests := ⟦ + -- ========================================================================== + -- Non-native Goldilocks byte arithmetic (vs `gl_ops_ref`). + -- ========================================================================== + fn assert_g8(x: Goldilocks, e: Goldilocks) -> G { + assert_eq!(to_field(x[0]), to_field(e[0])); + assert_eq!(to_field(x[1]), to_field(e[1])); + assert_eq!(to_field(x[2]), to_field(e[2])); + assert_eq!(to_field(x[3]), to_field(e[3])); + assert_eq!(to_field(x[4]), to_field(e[4])); + assert_eq!(to_field(x[5]), to_field(e[5])); + assert_eq!(to_field(x[6]), to_field(e[6])); + assert_eq!(to_field(x[7]), to_field(e[7])); + 1 + } + pub fn gl_addsub_test() -> G { + let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210 + let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0 + assert_eq!(assert_g8(gl_add(a, b), [255u8, 16u8, 17u8, 17u8, 18u8, 17u8, 17u8, 17u8]), 1); + assert_eq!(assert_g8(gl_sub(a, b), [32u8, 83u8, 151u8, 219u8, 31u8, 100u8, 168u8, 236u8]), 1); + assert_eq!(assert_g8(gl_sub(b, a), [225u8, 172u8, 104u8, 36u8, 223u8, 155u8, 87u8, 19u8]), 1); + -- edge: (p-1) + 5 ≡ 4 ; 5 - (p-1) ≡ 6 + let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8]; -- 0xFFFFFFFF00000000 + let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; + assert_eq!(assert_g8(gl_add(pm1, five), [4u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1); + assert_eq!(assert_g8(gl_sub(five, pm1), [6u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]), 1); + 1 + } + + fn assert_eg(x: ExtGoldilocks, e0: Goldilocks, e1: Goldilocks) -> G { + assert_eq!(assert_g8(x[0], e0), 1); + assert_eq!(assert_g8(x[1], e1), 1); + 1 + } + pub fn gl_muldiv_test() -> G { + let a = [16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8]; -- 0xFEDCBA9876543210 + let b = [240u8, 222u8, 188u8, 154u8, 120u8, 86u8, 52u8, 18u8]; -- 0x123456789ABCDEF0 + assert_eq!(assert_g8(gl_mul(a, b), [212u8, 186u8, 123u8, 108u8, 31u8, 253u8, 234u8, 250u8]), 1); + assert_eq!(assert_g8(gl_inverse(a), [97u8, 29u8, 109u8, 46u8, 183u8, 100u8, 8u8, 102u8]), 1); + assert_eq!(assert_g8(gl_div(a, b), [63u8, 59u8, 61u8, 54u8, 46u8, 255u8, 29u8, 186u8]), 1); + -- edge: (p-1)·5 ≡ p-5 + let pm1 = [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8]; + let five = [5u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; + assert_eq!(assert_g8(gl_mul(pm1, five), [252u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8]), 1); + -- a·a⁻¹ = 1 and b·b⁻¹ = 1 + assert_eq!(assert_g8(gl_mul(a, gl_inverse(a)), gl_one()), 1); + assert_eq!(assert_g8(gl_mul(b, gl_inverse(b)), gl_one()), 1); + 1 + } + pub fn eg_ops_test() -> G { + -- e0 = (0xFEDCBA9876543210, 0x0123456789ABCDEF), e1 = (0x1111111122222222, 0x3333333344444444) + let e0 = [[16u8, 50u8, 84u8, 118u8, 152u8, 186u8, 220u8, 254u8], + [239u8, 205u8, 171u8, 137u8, 103u8, 69u8, 35u8, 1u8]]; + let e1 = [[34u8, 34u8, 34u8, 34u8, 17u8, 17u8, 17u8, 17u8], + [68u8, 68u8, 68u8, 68u8, 51u8, 51u8, 51u8, 51u8]]; + assert_eq!(assert_eg(eg_add(e0, e1), + [49u8, 84u8, 118u8, 152u8, 170u8, 203u8, 237u8, 15u8], + [51u8, 18u8, 240u8, 205u8, 154u8, 120u8, 86u8, 52u8]), 1); + assert_eq!(assert_eg(eg_mul(e0, e1), + [10u8, 238u8, 162u8, 36u8, 224u8, 127u8, 182u8, 134u8], + [215u8, 234u8, 152u8, 224u8, 219u8, 254u8, 32u8, 67u8]), 1); + assert_eq!(assert_eg(eg_inverse(e0), + [221u8, 238u8, 29u8, 131u8, 179u8, 89u8, 214u8, 216u8], + [114u8, 99u8, 206u8, 108u8, 15u8, 88u8, 161u8, 246u8]), 1); + assert_eq!(assert_eg(eg_div(e0, e1), + [42u8, 59u8, 64u8, 77u8, 226u8, 214u8, 95u8, 63u8], + [200u8, 46u8, 148u8, 147u8, 124u8, 180u8, 248u8, 140u8]), 1); + -- e0 · e0⁻¹ = 1 + assert_eq!(assert_eg(eg_mul(e0, eg_inverse(e0)), gl_one(), gl_zero()), 1); + 1 + } + + -- ========================================================================== + -- Challenger (vs `pcs_challenger_ref` / `pcs_challenger4_ref`). + -- ========================================================================== + + -- Self-test: `sample_bits(20)` after observing the single `Val` + -- `0x0102030405060708` (8 LE bytes) must equal the reference `799146` + -- (`pcs_challenger_ref` in `multi-stark/src/types.rs`). + pub fn sample_bits_test() -> G { + let input = store(ListNode.Cons(8u8, store(ListNode.Cons(7u8, store(ListNode.Cons(6u8, + store(ListNode.Cons(5u8, store(ListNode.Cons(4u8, store(ListNode.Cons(3u8, + store(ListNode.Cons(2u8, store(ListNode.Cons(1u8, store(ListNode.Nil))))))))))))))))); + let (bits, _i, _o) = ch_sample_bits(input, store(ListNode.Nil), 20); + assert_eq!(bits_to_num(bits), 799146); + 1 + } + + -- Self-test: replay the synthetic PCS challenger sequence from + -- `pcs_challenger4_ref` (multi-stark/src/types.rs) and check every sampled + -- challenge against the reference. Decisively exercises the two consecutive + -- ext samples (α_pcs then α_fri) sharing one hash `output` stream. + pub fn pcs_challenger4_test() -> G { + -- post-ζ input buffer = observe V0 then V1 (forward / observation order). + let v0 = [8u8, 7u8, 6u8, 5u8, 4u8, 3u8, 2u8, 1u8]; -- 0x0102030405060708 + let v1 = [136u8, 119u8, 102u8, 85u8, 68u8, 51u8, 34u8, 17u8]; -- 0x1122334455667788 + let input = snoc_b8(snoc_b8(store(ListNode.Nil), v0), v1); + -- α_pcs (output empty ⇒ flush), then α_fri (CONSECUTIVE ⇒ thread output). + let (apcs, input, o1) = pcs_sample_ext(input, store(ListNode.Nil)); + let (afri, input, o2) = pcs_sample_ext(input, o1); + assert_eq!(limb_to_field(apcs[0]), 2882912772410685996); + assert_eq!(limb_to_field(apcs[1]), 910933442133595775); + assert_eq!(limb_to_field(afri[0]), 14440140149289897216); + assert_eq!(limb_to_field(afri[1]), 8092267645441512944); + -- observe commit (clears output), sample β. + let v2 = [239u8, 190u8, 173u8, 222u8, 0u8, 0u8, 0u8, 0u8]; -- 0x00000000deadbeef + let (input, _oc) = ch_observe_val(input, v2); + let (beta, input, _ob) = pcs_sample_ext(input, store(ListNode.Nil)); + assert_eq!(limb_to_field(beta[0]), 10456048119516576995); + assert_eq!(limb_to_field(beta[1]), 3173538015651228593); + -- observe final_poly coeff + log_arity (each a Val), then sample the index. + let v3 = [4u8, 3u8, 2u8, 1u8, 13u8, 12u8, 11u8, 10u8]; -- 0x0a0b0c0d01020304 + let v4 = [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; -- 0x0000000000000002 + let (input, _o3) = ch_observe_val(input, v3); + let (input, _o4) = ch_observe_val(input, v4); + let (bits, _bi, _bo) = ch_sample_bits(input, store(ListNode.Nil), 20); + assert_eq!(bits_to_num(bits), 336138); + 1 + } + + -- ========================================================================== + -- FRI fold + reduced openings (vs `fri_fold_ref` / `ro_fold_ref`). + -- ========================================================================== + + -- Self-test: arity-2 fold at index 5, log_height 3 vs the `fri_fold_ref` + -- reference (computed by the real `TwoAdicFriFolding::fold_row`). + pub fn fri_fold_test() -> G { + let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0, + store(ListNode.Cons(1, store(ListNode.Nil))))))); + let e0 = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8], + [34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8, 34u8]]; + let e1 = [[51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8, 51u8], + [68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8, 68u8]]; + let beta = [[85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8, 85u8], + [102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8, 102u8]]; + let folded = fri_fold2(index_bits, 3, beta, e0, e1); + assert_eq!(limb_to_field(folded[0]), 9349172584842537206); + assert_eq!(limb_to_field(folded[1]), 984486879173118962); + 1 + } + + -- Self-test vs `ro_fold_ref`: x at index 5 / log_height 3, then accumulate + -- (p_z − p_x)/(z − x) over 3 columns with alpha powers. + pub fn ro_fold_test() -> G { + let index_bits = store(ListNode.Cons(1, store(ListNode.Cons(0, + store(ListNode.Cons(1, store(ListNode.Nil))))))); + let x = ro_x(index_bits, 3); + assert_eq!(limb_to_field(x), 117440512); + let z = [[154u8, 120u8, 86u8, 52u8, 18u8, 0u8, 0u8, 0u8], + [1u8, 239u8, 205u8, 171u8, 0u8, 0u8, 0u8, 0u8]]; + let alpha = [[17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8, 17u8], + [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]]; + let px0 = [11u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; + let px1 = [22u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; + let px2 = [33u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]; + let p_x = store(ListNode.Cons(px0, store(ListNode.Cons(px1, + store(ListNode.Cons(px2, store(ListNode.Nil))))))); + let pz0 = [[100u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]]; + let pz1 = [[200u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]]; + let pz2 = [[44u8, 1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], [3u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8]]; + let p_z = store(ListNode.Cons(pz0, store(ListNode.Cons(pz1, + store(ListNode.Cons(pz2, store(ListNode.Nil))))))); + let q = eg_inverse(eg_sub(z, [x, gl_zero()])); + let (ro, _ap) = ro_fold(p_x, p_z, q, alpha, [gl_zero(), gl_zero()], [gl_one(), gl_zero()]); + assert_eq!(limb_to_field(ro[0]), 7130765474285082575); + assert_eq!(limb_to_field(ro[1]), 12254464995725315436); + 1 + } + + -- ========================================================================== + -- Keccak MMCS sponge/compression + Merkle verify_batch + -- (vs `pcs_ref_values` / `pcs_merkle_ref`). Compares each output lane mod p + -- via `limb_to_field` (all reference lanes are canonical, so this is exact). + -- ========================================================================== + + -- A small single-byte value as a `U64` (8 LE bytes). + fn u64_of(b: U8) -> U64 { + [b, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] + } + -- `[i+1, i+2, …, n]` as a `List‹U64›`. + fn build_range(i: G, n: G) -> List‹U64› { + match n - i { + 0 => store(ListNode.Nil), + _ => store(ListNode.Cons(u64_of(u8_from_field_unsafe(i + 1)), build_range(i + 1, n))), + } + } + fn assert_digest(d: Digest, e0: G, e1: G, e2: G, e3: G) -> G { + assert_eq!(limb_to_field(d[0]), e0); + assert_eq!(limb_to_field(d[1]), e1); + assert_eq!(limb_to_field(d[2]), e2); + assert_eq!(limb_to_field(d[3]), e3); + 1 + } + pub fn pcs_hash_test() -> G { + -- LEAF3: hash([1,2,3]) + let d3 = mmcs_hash_row(build_range(0, 3)); + assert_eq!(assert_digest(d3, 0xc55a6a1beaea9fec, 0xc8f0dbc4c59ec440, + 0xacb1295de9bfe032, 0x445d569d3dfc9543), 1); + -- LEAF17: exactly one full block, no extra permute. + let d17 = mmcs_hash_row(build_range(0, 17)); + assert_eq!(assert_digest(d17, 0x388da73622e8fdd5, 0xec687be9c50d2218, + 0x528d145dfe6571af, 0xd2eb808dfba4703c), 1); + -- LEAF22: full block + 5-element partial (two permutes), >20 lanes. + let d22 = mmcs_hash_row(build_range(0, 22)); + assert_eq!(assert_digest(d22, 520358013996801752, 12301199992631688477, + 8732686820159480415, 10883226686987971725), 1); + -- LEAF20: full block + 3-element partial (two permutes). + let d20 = mmcs_hash_row(build_range(0, 20)); + assert_eq!(assert_digest(d20, 0xec696847be88d358, 0x202861c67ff4cec8, + 0x88e006a48aaa0661, 0xabaddb9d32ecd024), 1); + -- COMPRESS([1,2,3,4],[5,6,7,8]) + let c = mmcs_compress([u64_of(1u8), u64_of(2u8), u64_of(3u8), u64_of(4u8)], + [u64_of(5u8), u64_of(6u8), u64_of(7u8), u64_of(8u8)]); + assert_eq!(assert_digest(c, 0xda1ef0642722b22e, 0x4851efdbdb2a2fd8, + 0x37e8ff900ea95d47, 0xa153eee7805376fb), 1); + 1 + } + + -- Merkle `verify_batch` self-test against the `pcs_merkle_ref` reference: a + -- cap-height-0 tree over 3 matrices of heights 8/4/2 (log-heights 3/2/1), + -- opened at index 5 (path bits 1,0,1). Checks the recomputed root matches the + -- committed root and that the cap index is 0, then that a tampered opened row + -- yields a different (rejected) root. + pub fn pcs_merkle_test() -> G { + -- opened rows (matrix order): m0 row5, m1 row2, m2 row1. + let row0 = store(ListNode.Cons(u64_of(11u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil))))); + let row1 = store(ListNode.Cons(u64_of(107u8), store(ListNode.Cons(u64_of(108u8), + store(ListNode.Cons(u64_of(109u8), store(ListNode.Nil))))))); + let row2 = store(ListNode.Cons(u64_of(202u8), store(ListNode.Nil))); + let rows = store(ListNode.Cons(row0, store(ListNode.Cons(row1, + store(ListNode.Cons(row2, store(ListNode.Nil))))))); + -- log-heights and path bits (index 5 = 0b101, LSB first). + let lhs = store(ListNode.Cons(3, store(ListNode.Cons(2, store(ListNode.Cons(1, store(ListNode.Nil))))))); + let ibits = store(ListNode.Cons(1, store(ListNode.Cons(0, store(ListNode.Cons(1, store(ListNode.Nil))))))); + -- authentication path SIB0, SIB1, SIB2 (each a Digest = [U64; 4]). + let sib0 = [[9u8, 36u8, 179u8, 127u8, 205u8, 83u8, 105u8, 203u8], + [95u8, 229u8, 105u8, 223u8, 113u8, 55u8, 97u8, 122u8], + [135u8, 8u8, 65u8, 248u8, 163u8, 163u8, 68u8, 81u8], + [9u8, 11u8, 20u8, 209u8, 10u8, 168u8, 151u8, 125u8]]; + let sib1 = [[227u8, 58u8, 255u8, 213u8, 77u8, 152u8, 42u8, 77u8], + [113u8, 86u8, 2u8, 151u8, 97u8, 63u8, 58u8, 45u8], + [228u8, 139u8, 228u8, 194u8, 182u8, 115u8, 107u8, 221u8], + [248u8, 16u8, 30u8, 93u8, 176u8, 36u8, 205u8, 88u8]]; + let sib2 = [[236u8, 144u8, 115u8, 218u8, 140u8, 5u8, 86u8, 229u8], + [95u8, 186u8, 252u8, 175u8, 21u8, 247u8, 153u8, 25u8], + [113u8, 78u8, 92u8, 200u8, 212u8, 175u8, 247u8, 47u8], + [78u8, 145u8, 206u8, 54u8, 175u8, 155u8, 165u8, 206u8]]; + let proof = store(ListNode.Cons(sib0, store(ListNode.Cons(sib1, + store(ListNode.Cons(sib2, store(ListNode.Nil))))))); + let (root, capidx) = mmcs_root(rows, lhs, ibits, proof, 3); + assert_eq!(capidx, 0); + assert_eq!(assert_digest(root, 0x6211b9a1a116a006, 0x435ee98e1504880f, + 0x900c7274b9a215f, 0xf6e3aaac5dcd90bd), 1); + -- tamper: perturb m0's first opened value → root must change. + let bad0 = store(ListNode.Cons(u64_of(99u8), store(ListNode.Cons(u64_of(12u8), store(ListNode.Nil))))); + let bad_rows = store(ListNode.Cons(bad0, store(ListNode.Cons(row1, + store(ListNode.Cons(row2, store(ListNode.Nil))))))); + let cap = store(ListNode.Cons([[6u8, 160u8, 22u8, 161u8, 161u8, 185u8, 17u8, 98u8], + [15u8, 136u8, 4u8, 21u8, 142u8, 233u8, 94u8, 67u8], + [95u8, 33u8, 154u8, 75u8, 39u8, 199u8, 0u8, 9u8], + [189u8, 144u8, 205u8, 93u8, 172u8, 170u8, 227u8, 246u8]], + store(ListNode.Nil))); + assert_eq!(mmcs_verify(cap, rows, lhs, ibits, proof, 3), 1); + assert_eq!(mmcs_verify(cap, bad_rows, lhs, ibits, proof, 3), 0); + 1 + } +⟧ + +end MultiStark + +end diff --git a/Ix/MultiStark/Verifier.lean b/Ix/MultiStark/Verifier.lean new file mode 100644 index 00000000..e3207e31 --- /dev/null +++ b/Ix/MultiStark/Verifier.lean @@ -0,0 +1,816 @@ +module +public import Ix.Aiur.Meta +public import Ix.IxVM.Core +public import Ix.IxVM.ByteStream +public import Ix.MultiStark.Deserialize +public import Ix.MultiStark.Keccak +public import Ix.MultiStark.Pcs +public import Ix.MultiStark.SystemDeserialize + +/-! +# Multi-STARK verifier (Aiur) + +Reimplementation of `multi-stark/src/verifier.rs` (`System::verify_multiple_claims`) +over the deserialized `Proof` (`Ix/MultiStark/Deserialize.lean`). + +The Rust verifier runs these steps: + +1. **Shape check** — proof array dimensions match the system's circuit count and + column widths. +2. **Accumulator balance** — the last intermediate accumulator is zero (all + lookup pushes/pulls cancel). +3. **Fiat-Shamir replay** — reconstruct the Keccak challenger: observe + commitments / trace heights / claims, sample (lookup, fingerprint, α, ζ). +4. **PCS verification** — FRI opening proofs (see `Ix/MultiStark/Pcs.lean`). +5. **OOD evaluation** — recompute the composition polynomial at ζ and check + `composition(ζ) · inv_vanishing(ζ) == quotient(ζ)`. + +### Implemented here +* Step 1 (the system-independent part): the proof is internally consistent — + `stage_1`, `stage_2` and `intermediate_accumulators` all have the same length + (the circuit count) and it is non-zero. +* Step 2: accumulator balance — the last `intermediate_accumulator` is the zero + extension element. +* Step 3: the Fiat-Shamir challenger replay (`fiat_shamir`). Prover-faithful: + observes the verifying key's preprocessed commitment, the stage_1 commitment, + the trace heights, and the public claims (in that order), then samples and + re-observes the lookup/fingerprint challenges, observes stage_2, samples α, + observes the quotient commitment, and samples ζ — matching + `verify_multiple_claims` byte-for-byte. +* Step 5: the out-of-domain composition/quotient check (`ood_verify`). For each + circuit it recomputes `composition(ζ)` by replaying the AIR constraint folder + (`VerifierConstraintFolder` + `LookupAir::eval`) over the deserialized + symbolic system and the opened values, recomputes `quotient(ζ)` from the + opened quotient chunks (barycentric `zps` weights over the split quotient + domains), and asserts `composition(ζ) · inv_vanishing(ζ) == quotient(ζ)`. + Validated end-to-end against a real factorial proof (`RecursiveVerifier.lean`): + the verifier accepts the honest proof and rejects a tampered claim. + +### Stubbed / TODO +* Base-field samples are rejection-sampled (`ch_sample_field`): a raw 8-byte + limb in the band `[p, 2⁶⁴)` (probability ≈ 2⁻³²) is discarded and redrawn, + consuming challenger bytes exactly as `SerializingChallenger64::sample` does. +* The PCS opening proof (`pcs_verify`) is still an accept-stub. With the PCS + stubbed, this verifier checks every algebraic relation except FRI proximity, + so it is not yet fully sound. +-/ + +public section + +namespace MultiStark + +def verifier := ⟦ + -- An extension element `[c0, c1]` (`= c0 + c1·X`) is zero iff both Goldilocks + -- coefficients are zero. (`read_ext` already reduced the limbs mod p.) + fn ext_is_zero(e: Ext) -> G { + gl_is_zero(e[0]) * gl_is_zero(e[1]) + } + + -- 1 iff the LAST element of the accumulator list is the zero extension + -- element (Rust: `intermediate_accumulators.last() == Some(ExtVal::ZERO)`). + -- The empty list returns 0 (there is no last element to balance). + fn last_acc_is_zero(accs: List‹Ext›) -> G { + match load(accs) { + ListNode.Nil => 0, + ListNode.Cons(e, rest) => + match load(rest) { + ListNode.Nil => ext_is_zero(e), + _ => last_acc_is_zero(rest), + }, + } + } + + -- ========================================================================== + -- Fiat-Shamir challenger: `SerializingChallenger64>`. The inner byte challenger keeps an `input` buffer; a + -- `sample` with empty `output` flushes (`input := output := keccak256(input)`) + -- and pops bytes from the END of the hash output. The outer layer serializes + -- field elements as 8 little-endian bytes and samples field elements as + -- 8-byte little-endian u64s. + -- + -- The challenger is threaded as a pair `(input, output)` of byte lists, where + -- `output` is held in pop order (front = next byte = hash byte 31, 30, …). + -- ========================================================================== + + -- Cons 8 bytes (LSB-first) of `b` onto `tail` (one byte list segment). + fn b8_onto(b: [U8; 8], tail: ByteStream) -> ByteStream { + store(ListNode.Cons(b[0], store(ListNode.Cons(b[1], store(ListNode.Cons(b[2], + store(ListNode.Cons(b[3], store(ListNode.Cons(b[4], store(ListNode.Cons(b[5], + store(ListNode.Cons(b[6], store(ListNode.Cons(b[7], tail)))))))))))))))) + } + + -- A digest = `[u64; 4]` = 32 bytes (each limb little-endian) onto `tail`. + fn digest_onto(d: Digest, tail: ByteStream) -> ByteStream { + b8_onto(d[0], b8_onto(d[1], b8_onto(d[2], b8_onto(d[3], tail)))) + } + + -- A commitment (`MerkleCap` = `Vec`): all digest bytes, onto `tail`. + fn cap_onto(cap: MerkleCap, tail: ByteStream) -> ByteStream { + match load(cap) { + ListNode.Nil => tail, + ListNode.Cons(d, rest) => digest_onto(d, cap_onto(rest, tail)), + } + } + + -- Observe `log_degrees`: each is a `Val::from_u8`, i.e. 8 LE bytes `[ld,0,…]`. + fn log_degrees_onto(lds: List‹U8›, tail: ByteStream) -> ByteStream { + match load(lds) { + ListNode.Nil => tail, + ListNode.Cons(ld, rest) => + b8_onto([ld, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], log_degrees_onto(rest, tail)), + } + } + + -- Reverse `l` onto `acc` (used to put a hash output into pop order). + fn rev_onto(l: ByteStream, acc: ByteStream) -> ByteStream { + match load(l) { + ListNode.Nil => acc, + ListNode.Cons(b, rest) => rev_onto(rest, store(ListNode.Cons(b, acc))), + } + } + + -- Sample one byte. If `output` is empty, flush: hash the `input` buffer, set + -- the new input to the 32 hash bytes, and refill `output` (in pop order). + fn ch_sample_byte(input: ByteStream, output: ByteStream) -> (U8, ByteStream, ByteStream) { + match load(output) { + ListNode.Cons(b, rest) => (b, input, rest), + ListNode.Nil => + let h = keccak256(input); + let fwd = digest_onto(h, store(ListNode.Nil)); + let rev = rev_onto(fwd, store(ListNode.Nil)); + let &ListNode.Cons(b, rest) = rev; + (b, fwd, rest), + } + } + + -- Sample 8 bytes = `sample_array()` (for one base-field element, LE). + fn ch_sample8(input: ByteStream, output: ByteStream) -> ([U8; 8], ByteStream, ByteStream) { + let (b0, i0, o0) = ch_sample_byte(input, output); + let (b1, i1, o1) = ch_sample_byte(i0, o0); + let (b2, i2, o2) = ch_sample_byte(i1, o1); + let (b3, i3, o3) = ch_sample_byte(i2, o2); + let (b4, i4, o4) = ch_sample_byte(i3, o3); + let (b5, i5, o5) = ch_sample_byte(i4, o4); + let (b6, i6, o6) = ch_sample_byte(i5, o5); + let (b7, i7, o7) = ch_sample_byte(i6, o6); + ([b0, b1, b2, b3, b4, b5, b6, b7], i7, o7) + } + + -- Sample one base-field element with REJECTION SAMPLING, mirroring + -- `SerializingChallenger64::sample`'s inner loop: draw 8 bytes as a LE u64 + -- (the `log2_ceil(p) = 64` mask is a no-op for Goldilocks); if the raw value + -- is ≥ p (probability ≈ 2⁻³²), DISCARD it and draw the next 8 bytes — a + -- rejected draw consumes challenger bytes, shifting every later sample, + -- exactly as in the reference. `raw < p` ⟺ `sub8(raw, p)` borrows. The + -- accepted limb is canonical (< p) by construction. + fn ch_sample_field(input: ByteStream, output: ByteStream) -> ([U8; 8], ByteStream, ByteStream) { + let (raw, i1, o1) = ch_sample8(input, output); + let (_d, borrow) = sub8(raw, gl_p()); + match borrow { + 1 => (raw, i1, o1), + _ => ch_sample_field(i1, o1), + } + } + + -- Sample a degree-2 extension element: two base samples (`from_basis_*`), + -- each rejection-sampled, returning their 8-byte LE limbs (canonical, but + -- also re-observable as raw bytes) and the threaded challenger. + fn ch_sample_ext(input: ByteStream, output: ByteStream) -> ([U8; 8], [U8; 8], ByteStream, ByteStream) { + let (c0, i0, o0) = ch_sample_field(input, output); + let (c1, i1, o1) = ch_sample_field(i0, o0); + (c0, c1, i1, o1) + } + + -- `sample_bits(n)` (FRI query index). `SerializingChallenger64::sample_bits` + -- reads one 8-byte sample as a little-endian u64 and masks the low `n` bits. + -- We return the low `n` bits as a list (LSB first = the leaf→root Merkle/FRI + -- path), built from the 8 sampled bytes' bit decompositions (reusing keccak's + -- `cons8`), truncated to `n`. + fn sample8_bits(bytes: [U8; 8]) -> List‹G› { + cons8(u8_bit_decomposition(bytes[0]), + cons8(u8_bit_decomposition(bytes[1]), + cons8(u8_bit_decomposition(bytes[2]), + cons8(u8_bit_decomposition(bytes[3]), + cons8(u8_bit_decomposition(bytes[4]), + cons8(u8_bit_decomposition(bytes[5]), + cons8(u8_bit_decomposition(bytes[6]), + cons8(u8_bit_decomposition(bytes[7]), store(ListNode.Nil))))))))) + } + fn take_bits(bits: List‹G›, n: G) -> List‹G› { + match n { + 0 => store(ListNode.Nil), + _ => + let &ListNode.Cons(b, rest) = bits; + store(ListNode.Cons(b, take_bits(rest, n - 1))), + } + } + fn ch_sample_bits(input: ByteStream, output: ByteStream, n: G) + -> (List‹G›, ByteStream, ByteStream) { + let (bytes, i1, o1) = ch_sample8(input, output); + (take_bits(sample8_bits(bytes), n), i1, o1) + } + + -- Append (observe) 8 little-endian bytes of `b` at the END of the challenger + -- input buffer. The transcript is held front-to-back (front = first observed = + -- first hashed, matching `keccak256`'s absorption order), so an observation + -- appends — `b8_onto` PREPENDS, hence the `list_concat`. + fn snoc_b8(input: ByteStream, b: [U8; 8]) -> ByteStream { + list_concat(input, b8_onto(b, store(ListNode.Nil))) + } + -- Append (observe) a commitment (`MerkleCap`) at the end of the buffer. + fn snoc_cap(input: ByteStream, cap: MerkleCap) -> ByteStream { + list_concat(input, cap_onto(cap, store(ListNode.Nil))) + } + + -- ========================================================================== + -- PCS challenger continuation (Phase 4): the post-ζ transcript that + -- `two_adic_pcs::verify` + `verify_fri` replay. Unlike `fiat_shamir` — where + -- every sample is followed by an observe (so each sample re-flushes from an + -- empty `output`) — the PCS phase has *consecutive* samples with no observe + -- between (the PCS batch challenge α, then immediately the FRI batch challenge + -- α). So both challenger buffers must be threaded: `output` carries the + -- leftover hash bytes from one sample into the next instead of re-flushing. + -- ========================================================================== + + -- Observe one `Val` (8 LE bytes): append to `input`, CLEAR `output` (any + -- leftover sampled bytes are discarded), per `HashChallenger::observe`. + fn ch_observe_val(input: ByteStream, v: U64) -> (ByteStream, ByteStream) { + (snoc_b8(input, v), store(ListNode.Nil)) + } + + -- Sample a degree-2 extension element, threading BOTH challenger buffers so a + -- following consecutive sample continues from the same hash `output` stream + -- (no re-flush). Limbs are rejection-sampled (`ch_sample_field`), so they are + -- canonical; the `gl_reduce` is a no-op kept for type/intent clarity. + fn pcs_sample_ext(input: ByteStream, output: ByteStream) + -> (Ext, ByteStream, ByteStream) { + let (c0, c1, i1, o1) = ch_sample_ext(input, output); + ([gl_reduce(c0), gl_reduce(c1)], i1, o1) + } + + -- Append a claim's values (each `Val` as 8 LE bytes) onto `tail`, in order. + fn claim_vals_onto(vals: List‹U64›, tail: ByteStream) -> ByteStream { + match load(vals) { + ListNode.Nil => tail, + ListNode.Cons(v, rest) => b8_onto(v, claim_vals_onto(rest, tail)), + } + } + -- Append every claim's values (`challenger.observe_slice(claim)` per claim). + fn claims_onto(claims: List‹List‹U64››, tail: ByteStream) -> ByteStream { + match load(claims) { + ListNode.Nil => tail, + ListNode.Cons(c, rest) => claim_vals_onto(c, claims_onto(rest, tail)), + } + } + + -- The preprocessed commitment cap from the verifying key, or an empty cap + -- (observes nothing) when there is none. + fn opt_commit_cap(commit: OptCommit) -> MerkleCap { + match commit { + OptCommit.NoCommit => store(ListNode.Nil), + OptCommit.SomeCommit(c) => c, + } + } + + -- Replay the verifier transcript and derive the four challenges + -- `(lookup, fingerprint, alpha, zeta)`. Mirrors `verify_multiple_claims`'s + -- challenger sequence exactly: + -- observe preprocessed_commit (if any) → stage_1 → log_degrees → claims; + -- sample lookup, observe it; sample fingerprint, observe it; + -- observe stage_2; sample α; observe quotient; sample ζ. + -- `observe` clears the challenger's output buffer, and every sample here is + -- preceded by an observe, so each `ch_sample_ext` re-flushes from an empty + -- output (hence the `store(ListNode.Nil)` output argument each time). + -- Every sample is rejection-sampled (`ch_sample_field` inside + -- `ch_sample_ext`), so a limb in the band `[p, 2⁶⁴)` is redrawn exactly as in + -- the reference challenger, and the limbs observed back are canonical. + -- Also returns the post-ζ challenger `input` buffer, which the PCS phase + -- (Phase 4+) continues observing into. The leftover `output` after the ζ + -- sample is discarded — the next challenger op is an observe (of the opened + -- values), which clears `output` anyway. + fn fiat_shamir(prep: MerkleCap, s1: MerkleCap, s2: MerkleCap, q: MerkleCap, + lds: List‹U8›, claims: List‹List‹U64››) -> (Ext, Ext, Ext, Ext, ByteStream) { + -- Initial transcript, front-to-back: prep, stage_1, log_degrees, claims. + -- Built inner-to-outer with the prepend helpers so the result is in + -- forward (observation) order. + let input = claims_onto(claims, store(ListNode.Nil)); + let input = log_degrees_onto(lds, input); + let input = cap_onto(s1, input); + let input = cap_onto(prep, input); + -- sample lookup challenge, then observe it back (append) + let (l0, l1, input, _ol) = ch_sample_ext(input, store(ListNode.Nil)); + let input = snoc_b8(snoc_b8(input, l0), l1); + -- sample fingerprint challenge, then observe it back + let (f0, f1, input, _of) = ch_sample_ext(input, store(ListNode.Nil)); + let input = snoc_b8(snoc_b8(input, f0), f1); + -- observe stage_2 commitment + let input = snoc_cap(input, s2); + -- sample constraint challenge α (not observed) + let (a0, a1, input, _oa) = ch_sample_ext(input, store(ListNode.Nil)); + -- observe quotient commitment + let input = snoc_cap(input, q); + -- sample out-of-domain point ζ; keep the resulting `input` for the PCS phase + let (z0, z1, zinput, _oz) = ch_sample_ext(input, store(ListNode.Nil)); + ([gl_reduce(l0), gl_reduce(l1)], + [gl_reduce(f0), gl_reduce(f1)], + [gl_reduce(a0), gl_reduce(a1)], + [gl_reduce(z0), gl_reduce(z1)], + zinput) + } + + -- ========================================================================== + -- OOD evaluation domain math (`TwoAdicMultiplicativeCoset`, Goldilocks). + -- The trace domain for a circuit of size 2^L is the order-2^L subgroup H + -- (shift = 1) generated by `two_adic_gen(L)`. + -- ========================================================================== + + -- e^(2^k) in the extension field. + fn ext_exp_pow2(e: Ext, k: G) -> Ext { + match k { + 0 => e, + _ => ext_exp_pow2(eg_mul(e, e), k - 1), + } + } + + -- `two_adic_generator(bits)` — a primitive 2^bits root of unity in Goldilocks + -- (`Plonky3/goldilocks/src/goldilocks.rs::TWO_ADIC_GENERATORS`). + fn two_adic_gen(bits: G) -> [U8; 8] { + match bits { + 0 => [1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8], + 1 => [0u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8], + 2 => [0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 1u8, 0u8], + 3 => [1u8, 0u8, 0u8, 255u8, 254u8, 255u8, 255u8, 255u8], + 4 => [1u8, 0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 239u8], + 5 => [0u8, 192u8, 255u8, 255u8, 255u8, 63u8, 0u8, 0u8], + 6 => [0u8, 0u8, 0u8, 0u8, 128u8, 0u8, 0u8, 0u8], + 7 => [1u8, 0u8, 0u8, 8u8, 255u8, 7u8, 0u8, 248u8], + 8 => [102u8, 169u8, 12u8, 230u8, 60u8, 20u8, 121u8, 191u8], + 9 => [78u8, 31u8, 65u8, 92u8, 42u8, 208u8, 5u8, 25u8], + 10 => [114u8, 217u8, 254u8, 139u8, 215u8, 42u8, 143u8, 157u8], + 11 => [207u8, 200u8, 161u8, 29u8, 128u8, 180u8, 83u8, 6u8], + 12 => [182u8, 252u8, 157u8, 149u8, 153u8, 81u8, 195u8, 242u8], + 13 => [151u8, 121u8, 209u8, 53u8, 35u8, 239u8, 68u8, 21u8], + 14 => [226u8, 161u8, 187u8, 16u8, 147u8, 9u8, 238u8, 224u8], + 15 => [172u8, 186u8, 6u8, 35u8, 254u8, 207u8, 178u8, 246u8], + 16 => [14u8, 69u8, 121u8, 191u8, 48u8, 150u8, 223u8, 84u8], + 17 => [14u8, 138u8, 61u8, 170u8, 232u8, 166u8, 208u8, 171u8], + 18 => [172u8, 190u8, 249u8, 5u8, 123u8, 26u8, 40u8, 129u8], + 19 => [2u8, 51u8, 170u8, 140u8, 107u8, 28u8, 212u8, 251u8], + 20 => [109u8, 231u8, 147u8, 94u8, 205u8, 46u8, 186u8, 48u8], + 21 => [84u8, 38u8, 50u8, 50u8, 245u8, 174u8, 2u8, 245u8], + 22 => [181u8, 70u8, 114u8, 230u8, 173u8, 24u8, 42u8, 75u8], + 23 => [139u8, 201u8, 251u8, 54u8, 19u8, 90u8, 157u8, 234u8], + 24 => [113u8, 225u8, 7u8, 195u8, 49u8, 204u8, 205u8, 134u8], + 25 => [216u8, 239u8, 207u8, 110u8, 151u8, 245u8, 186u8, 75u8], + 26 => [134u8, 226u8, 214u8, 120u8, 91u8, 208u8, 65u8, 237u8], + 27 => [29u8, 23u8, 90u8, 145u8, 216u8, 141u8, 215u8, 16u8], + 28 => [133u8, 68u8, 74u8, 0u8, 0u8, 149u8, 4u8, 89u8], + 29 => [102u8, 38u8, 109u8, 164u8, 59u8, 201u8, 168u8, 223u8], + 30 => [69u8, 8u8, 106u8, 184u8, 9u8, 208u8, 155u8, 126u8], + 31 => [89u8, 230u8, 136u8, 85u8, 117u8, 127u8, 10u8, 64u8], + _ => [140u8, 135u8, 88u8, 218u8, 220u8, 41u8, 86u8, 24u8], + } + } + + -- Vanishing polynomial of the trace domain (shift = 1, size 2^L) at point ζ: + -- `Z_H(ζ) = ζ^(2^L) - 1`. + fn trace_vanishing(zeta: Ext, l: G) -> Ext { + eg_sub(ext_exp_pow2(zeta, l), [gl_one(), gl_zero()]) + } + + -- Lagrange selectors at ζ for the trace domain (shift = 1), mirroring + -- `domain.rs::selectors_at_point`: + -- is_first_row = Z_H(ζ) / (ζ - 1) + -- is_last_row = Z_H(ζ) / (ζ - g⁻¹) + -- is_transition = ζ - g⁻¹ + -- inv_vanishing = 1 / Z_H(ζ) + -- where g = two_adic_gen(L) is the subgroup generator. + fn trace_selectors(zeta: Ext, l: G) -> (Ext, Ext, Ext, Ext) { + let zh = trace_vanishing(zeta, l); + let ginv = gl_inverse(two_adic_gen(l)); + let is_first = eg_div(zh, eg_sub(zeta, [gl_one(), gl_zero()])); + let is_last = eg_div(zh, eg_sub(zeta, [ginv, gl_zero()])); + let is_trans = eg_sub(zeta, [ginv, gl_zero()]); + let inv_van = eg_inverse(zh); + (is_first, is_last, is_trans, inv_van) + } + + -- Structural + accumulator + PCS checks of a deserialized proof (steps 1, 2, + -- 4). Fiat-Shamir (step 3) and the OOD check (step 5) live in `ood_verify`, + -- which needs the verifying key and the claims. + -- + -- Returns 1 on success; `assert_eq!` aborts the (proof) execution on any + -- failed check, exactly as the Rust verifier returns `Err`. + fn verify(proof: Proof) -> G { + match proof { + Proof.Mk(_commitments, accs, _log_degrees, _opening, + _quotient, _preprocessed, stage_1, stage_2) => + -- Step 1 (shape, system-independent): the per-round opened-value lists + -- and the accumulator list all have the same length = the circuit count. + let num_circuits = list_length(accs); + -- there must be at least one circuit (Rust: InvalidSystem) + assert_eq!(eq_zero(num_circuits), 0); + assert_eq!(list_length(stage_1), num_circuits); + assert_eq!(list_length(stage_2), num_circuits); + + -- Step 2: accumulator balance — the last accumulator must be zero. + assert_eq!(last_acc_is_zero(accs), 1); + -- Step 4 (PCS/FRI) now runs inside `ood_verify`, which has the verifying + -- key, the challenger continuation, and the opened values it needs. + 1, + } + } + + -- ========================================================================== + -- Step 5: out-of-domain (OOD) evaluation. + -- + -- Mirrors the per-circuit loop in `verifier.rs::verify_multiple_claims` + -- (lines 329-434). For each circuit it recomputes the composition polynomial + -- `composition(ζ)` from the opened values by replaying the AIR constraint + -- folder (`VerifierConstraintFolder` + `LookupAir::eval`), recomputes the + -- quotient `quotient(ζ)` from the opened quotient chunks via the barycentric + -- weights `zps`, and asserts + -- composition(ζ) · inv_vanishing(ζ) == quotient(ζ). + -- + -- The challenges (lookup, fingerprint, α, ζ) come from `fiat_shamir` above. + -- As with `fiat_shamir`, claims are not observed; this assumes the no-claims + -- case, so the running lookup accumulator starts at the zero extension + -- element (`acc` in Rust = ExtVal::ZERO). + -- ========================================================================== + + -- One Horner fold step of the constraint folder: `acc := acc·α + x` + -- (`VerifierConstraintFolder::assert_zero` / `assert_zero_ext`). + fn ood_fold(acc: Ext, alpha: Ext, x: Ext) -> Ext { + eg_add(eg_mul(acc, alpha), x) + } + + -- Reconstruct an extension element from its two opened base coordinates, + -- `from_ext_basis([c0, c1]) = c0 + c1·X` (the ExtVal basis is `[1, X]`). + fn from_ext_basis(c0: Ext, c1: Ext) -> Ext { + eg_add(c0, eg_mul(c1, [gl_zero(), gl_one()])) + } + + -- A stage-2 / quotient opened row arrives as `stage_2_width·2` extension + -- coordinates; fold consecutive pairs back into `stage_2_width` extension + -- elements (Rust: `chunks_exact(2).map(from_ext_basis)`). + fn reconstruct_ext_row(raw: List‹Ext›) -> List‹Ext› { + match load(raw) { + ListNode.Nil => store(ListNode.Nil), + ListNode.Cons(c0, t1) => + let ListNode.Cons(c1, t2) = load(t1); + store(ListNode.Cons(from_ext_basis(c0, c1), reconstruct_ext_row(t2))), + } + } + + -- Evaluate a symbolic AIR expression at the opened point (Rust: + -- `SymbolicExpression::interpret`). Function-circuit `zeros` and lookup + -- args/multiplicities only ever reference `Main`/`Preprocessed` entries + -- (offset 0) and constants; the other entries never appear here. + fn eval_sym(e: SymExpr, main: List‹Ext›, prep: List‹Ext›) -> Ext { + match e { + SymExpr.Var(entry, idx) => + match entry { + SysEntry.Main(_o) => list_lookup(main, idx), + SysEntry.Preprocessed(_o) => list_lookup(prep, idx), + SysEntry.Stage2(_o) => [gl_zero(), gl_zero()], + SysEntry.Public => [gl_zero(), gl_zero()], + SysEntry.Stage2Public => [gl_zero(), gl_zero()], + SysEntry.Challenge => [gl_zero(), gl_zero()], + }, + SymExpr.IsFirstRow => [gl_zero(), gl_zero()], + SymExpr.IsLastRow => [gl_zero(), gl_zero()], + SymExpr.IsTransition => [gl_zero(), gl_zero()], + SymExpr.Const(c) => [c, gl_zero()], + SymExpr.Add(x, y, _d) => eg_add(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)), + SymExpr.Sub(x, y, _d) => eg_sub(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)), + SymExpr.Neg(x, _d) => eg_neg(eval_sym(load(x), main, prep)), + SymExpr.Mul(x, y, _d) => eg_mul(eval_sym(load(x), main, prep), eval_sym(load(y), main, prep)), + } + } + + -- `fingerprint(r, args) = Σ argᵢ·rⁱ` (Horner over the reversed argument list, + -- `lookup.rs::fingerprint`). + fn fingerprint_ext(r: Ext, args: List‹SymExpr›, main: List‹Ext›, prep: List‹Ext›) -> Ext { + match load(args) { + ListNode.Nil => [gl_zero(), gl_zero()], + ListNode.Cons(a, rest) => + eg_add(eval_sym(a, main, prep), eg_mul(r, fingerprint_ext(r, rest, main, prep))), + } + } + + -- Fold the inner-AIR `zeros` constraints (Function circuit): each is asserted + -- zero on the main (stage-1) row, with no preprocessed row. + fn fold_zeros(acc: Ext, alpha: Ext, zeros: List‹SymExpr›, main: List‹Ext›) -> Ext { + match load(zeros) { + ListNode.Nil => acc, + ListNode.Cons(z, rest) => + fold_zeros(ood_fold(acc, alpha, eval_sym(z, main, store(ListNode.Nil))), alpha, rest, main), + } + } + + -- Fold the selector boolean checks (Function circuit): `assert_bool(row[s])` + -- = `assert_zero(s·(s-1))` for `s` in the selector range `[idx, idx+count)`. + fn fold_sel_bools(acc: Ext, alpha: Ext, main: List‹Ext›, idx: G, count: G) -> Ext { + match count { + 0 => acc, + _ => + let x = list_lookup(main, idx); + let bc = eg_mul(x, eg_sub(x, [gl_one(), gl_zero()])); + fold_sel_bools(ood_fold(acc, alpha, bc), alpha, main, idx + 1, count - 1), + } + } + + -- Fold the per-lookup constraints (`LookupAir::eval`): for lookup `k`, + -- `assert_one_ext(messageₖ · minvₖ)` = `assert_zero_ext(messageₖ·minvₖ - 1)`, + -- where `minvₖ = stage_2_row[1+k]` and + -- `messageₖ = lookup_challenge + fingerprint(fingerprint_challenge, argsₖ)`. + -- Simultaneously builds `acc_expr = stage_2_row[0] + Σ multiplicityₖ·minvₖ`. + -- Returns `(folder_acc, acc_expr)`. + fn fold_lookups(acc: Ext, alpha: Ext, lookups: List‹SysLookup›, k: G, + main: List‹Ext›, prep: List‹Ext›, s2row: List‹Ext›, + lch: Ext, fch: Ext, acc_expr: Ext) -> (Ext, Ext) { + match load(lookups) { + ListNode.Nil => (acc, acc_expr), + ListNode.Cons(lk, rest) => + let SysLookup.Mk(mult_e, args) = lk; + let minv = list_lookup(s2row, k + 1); + let mult = eval_sym(mult_e, main, prep); + let fp = fingerprint_ext(fch, args, main, prep); + let message = eg_add(lch, fp); + let c = eg_sub(eg_mul(message, minv), [gl_one(), gl_zero()]); + let acc = ood_fold(acc, alpha, c); + let acc_expr = eg_add(acc_expr, eg_mul(mult, minv)); + fold_lookups(acc, alpha, rest, k + 1, main, prep, s2row, lch, fch, acc_expr), + } + } + + -- The composition polynomial `composition(ζ)` for one circuit: replays the + -- inner-AIR constraints (per air kind) followed by the lookup-argument + -- constraints, folding each with `α` exactly as `LookupAir::eval` drives the + -- verifier folder. `accp`/`naccp` are the current/next lookup accumulators. + fn ood_composition(air: SysAir, lookups: List‹SysLookup›, + main: List‹Ext›, main_next: List‹Ext›, s2row: List‹Ext›, s2next: List‹Ext›, + prep: List‹Ext›, isf: Ext, isl: Ext, ist: Ext, + lch: Ext, fch: Ext, accp: Ext, naccp: Ext, alpha: Ext) -> Ext { + -- inner-AIR constraints first, then hand the accumulator to the lookup tail + -- (split so each `match air` arm ends in a tail call — Aiur forbids + -- non-tail matches). + match air { + SysAir.Function(c) => + let SysConstraints.Mk(zeros, ss, se, _w) = c; + let acc = fold_zeros([gl_zero(), gl_zero()], alpha, zeros, main); + let acc = fold_sel_bools(acc, alpha, main, ss, se - ss); + ood_comp_tail(acc, lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha), + SysAir.Memory(m) => + let SysMemory.Mk(_w) = m; + -- `Memory::eval`: is_real = col 1, ptr = col 2 (current and next row). + let is_real = list_lookup(main, 1); + let ptr = list_lookup(main, 2); + let is_real_next = list_lookup(main_next, 1); + let ptr_next = list_lookup(main_next, 2); + -- assert_bool(is_real) + let acc = ood_fold([gl_zero(), gl_zero()], alpha, eg_mul(is_real, eg_sub(is_real, [gl_one(), gl_zero()]))); + -- is_real_transition = is_real_next · is_transition + let irt = eg_mul(is_real_next, ist); + -- when(irt).assert_one(is_real) = irt·(is_real - 1) + let acc = ood_fold(acc, alpha, eg_mul(irt, eg_sub(is_real, [gl_one(), gl_zero()]))); + -- when(irt).assert_eq(ptr+1, ptr_next) = irt·(ptr + 1 - ptr_next) + let acc = ood_fold(acc, alpha, eg_mul(irt, eg_sub(eg_add(ptr, [gl_one(), gl_zero()]), ptr_next))); + ood_comp_tail(acc, lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha), + SysAir.Bytes1 => + ood_comp_tail([gl_zero(), gl_zero()], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha), + SysAir.Bytes2 => + ood_comp_tail([gl_zero(), gl_zero()], lookups, main, prep, s2row, s2next, isf, isl, ist, lch, fch, accp, naccp, alpha), + } + } + + -- Tail of `ood_composition`: fold the lookup-argument constraints onto the + -- inner-AIR accumulator, then the three accumulator boundary constraints. + fn ood_comp_tail(acc: Ext, lookups: List‹SysLookup›, main: List‹Ext›, prep: List‹Ext›, + s2row: List‹Ext›, s2next: List‹Ext›, isf: Ext, isl: Ext, ist: Ext, + lch: Ext, fch: Ext, accp: Ext, naccp: Ext, alpha: Ext) -> Ext { + let (acc, acc_expr) = + fold_lookups(acc, alpha, lookups, 0, main, prep, s2row, lch, fch, list_lookup(s2row, 0)); + let acc_col = list_lookup(s2row, 0); + let next_acc_col = list_lookup(s2next, 0); + -- when_first_row: acc_col = accp + let acc = ood_fold(acc, alpha, eg_mul(isf, eg_sub(acc_col, accp))); + -- when_transition: acc_expr = next_acc_col + let acc = ood_fold(acc, alpha, eg_mul(ist, eg_sub(acc_expr, next_acc_col))); + -- when_last_row: acc_expr = naccp + ood_fold(acc, alpha, eg_mul(isl, eg_sub(acc_expr, naccp))) + } + + -- ========================================================================== + -- Quotient evaluation from the opened quotient chunks. + -- + -- The trace domain is the order-2^L subgroup H (shift = 1). The quotient + -- domain is the disjoint coset `7·H'` of size `2^(L+log_qd)` (7 = Goldilocks + -- GENERATOR), split into `qd = 2^log_qd` chunk domains `Dⱼ` of size `2^L`, + -- shift `7·g_q^j` where `g_q = two_adic_gen(L + log_qd)`. Then + -- quotient(ζ) = Σⱼ zpsⱼ · from_ext_basis(chunkⱼ), + -- zpsⱼ = Πₖ≠ⱼ Z_{Dₖ}(ζ) / Z_{Dₖ}(first_point(Dⱼ)), + -- with `Z_{Dₖ}(x) = (x · shift_k⁻¹)^(2^L) - 1`. + -- ========================================================================== + + -- base-field power `base^e` (e small: the chunk index, < qd). `base` is a + -- non-native Goldilocks element; `e` is a native loop counter. + fn g_pow(base: [U8; 8], e: G) -> [U8; 8] { + match e { + 0 => gl_one(), + _ => gl_mul(base, g_pow(base, e - 1)), + } + } + + -- `Z_{Dⱼ}(x) = (x · shift_j⁻¹)^(2^L) - 1`, evaluated at extension point `x`. + fn vanish_chunk(x: Ext, l: G, shiftinv: [U8; 8]) -> Ext { + eg_sub(ext_exp_pow2(eg_mul(x, [shiftinv, gl_zero()]), l), [gl_one(), gl_zero()]) + } + + -- `zpsₜ = Πⱼ≠ₜ Z_{Dⱼ}(ζ) / Z_{Dⱼ}(shift_t)`. Iterates j over `[jidx, jidx+rem)`. + fn zps_prod(acc: Ext, zeta: Ext, l: G, g_q: [U8; 8], shift_t: [U8; 8], jidx: G, rem: G, t: G) -> Ext { + match rem { + 0 => acc, + _ => + let shiftinv = gl_inverse(gl_mul(gl_seven(), g_pow(g_q, jidx))); + -- skip the j = t factor (the chunk's own domain); branch in tail + -- position so the inner match is not a non-tail match. + match eq_zero(jidx - t) { + 1 => zps_prod(acc, zeta, l, g_q, shift_t, jidx + 1, rem - 1, t), + _ => + let factor = eg_mul(vanish_chunk(zeta, l, shiftinv), + eg_inverse(vanish_chunk([shift_t, gl_zero()], l, shiftinv))); + zps_prod(eg_mul(acc, factor), zeta, l, g_q, shift_t, jidx + 1, rem - 1, t), + }, + } + } + + -- `quotient(ζ) = Σₜ zpsₜ · from_ext_basis(chunkₜ)`, iterating the `qd` chunks + -- (`q_opened[idx][0] = [c0, c1]`). + fn quotient_sum(acc: Ext, zeta: Ext, l: G, qd: G, g_q: [U8; 8], + q_opened: OpenedRound, idx: G, rem: G, t: G) -> Ext { + match rem { + 0 => acc, + _ => + let shift_t = gl_mul(gl_seven(), g_pow(g_q, t)); + let zps_t = zps_prod([gl_one(), gl_zero()], zeta, l, g_q, shift_t, 0, qd, t); + let ch = list_lookup(q_opened, idx); + let row = list_lookup(ch, 0); + let qv = from_ext_basis(list_lookup(row, 0), list_lookup(row, 1)); + quotient_sum(eg_add(acc, eg_mul(zps_t, qv)), zeta, l, qd, g_q, + q_opened, idx + 1, rem - 1, t + 1), + } + } + + -- `quotient_degree = (max(md, 2) - 1).next_power_of_two()` and its log2. + -- Tabulated for `max_constraint_degree ≤ 17` (covers all current circuits); + -- larger degrees fall through to the `_` arm. + fn quotient_degree_of(md: G) -> G { + match md { + 0 => 1, 1 => 1, 2 => 1, + 3 => 2, + 4 => 4, 5 => 4, + 6 => 8, 7 => 8, 8 => 8, 9 => 8, + 10 => 16, 11 => 16, 12 => 16, 13 => 16, 14 => 16, 15 => 16, 16 => 16, 17 => 16, + _ => 32, + } + } + fn log_qd_of(md: G) -> G { + match md { + 0 => 0, 1 => 0, 2 => 0, + 3 => 1, + 4 => 2, 5 => 2, + 6 => 3, 7 => 3, 8 => 3, 9 => 3, + 10 => 4, 11 => 4, 12 => 4, 13 => 4, 14 => 4, 15 => 4, 16 => 4, 17 => 4, + _ => 5, + } + } + + -- The preprocessed opened row at ζ for circuit `i`, or `Nil` if the circuit + -- has no preprocessed trace (`preprocessed_indices[i] = None`). + fn ood_prep_row(prep_opt: PreprocessedOpt, oi: OptIdx) -> List‹Ext› { + match oi { + OptIdx.NoIdx => store(ListNode.Nil), + OptIdx.SomeIdx(j) => + match prep_opt { + PreprocessedOpt.NoPreprocessed => store(ListNode.Nil), + PreprocessedOpt.SomePreprocessed(round) => + let pr = list_lookup(round, j); + list_lookup(pr, 0), + }, + } + } + + -- Per-circuit OOD loop: for each circuit, recompute composition(ζ) and + -- quotient(ζ) and assert `composition · inv_vanishing == quotient`. Threads + -- the running lookup accumulator `accp` and the quotient-chunk offset `lastq`. + fn ood_loop(circuits: List‹SysCircuit›, prep_indices: List‹OptIdx›, + log_degrees: List‹U8›, accs: List‹Ext›, + stage1: OpenedRound, stage2: OpenedRound, prep_opt: PreprocessedOpt, + q_opened: OpenedRound, i: G, accp: Ext, lastq: G, + lch: Ext, fch: Ext, alpha: Ext, zeta: Ext) -> G { + match load(circuits) { + ListNode.Nil => 1, + ListNode.Cons(circ, rest) => + let SysCircuit.Mk(lair, _cc, md, _ph, _pw, _w1, _w2) = circ; + let SysLookupAir.Mk(air, lookups) = lair; + let l = to_field(list_lookup(log_degrees, i)); + let qd = quotient_degree_of(md); + let log_qd = log_qd_of(md); + let naccp = list_lookup(accs, i); + let s1 = list_lookup(stage1, i); + let main = list_lookup(s1, 0); + let main_next = list_lookup(s1, 1); + let s2 = list_lookup(stage2, i); + let s2row = reconstruct_ext_row(list_lookup(s2, 0)); + let s2next = reconstruct_ext_row(list_lookup(s2, 1)); + let prep = ood_prep_row(prep_opt, list_lookup(prep_indices, i)); + let (isf, isl, ist, invv) = trace_selectors(zeta, l); + let comp = ood_composition(air, lookups, main, main_next, s2row, s2next, + prep, isf, isl, ist, lch, fch, accp, naccp, alpha); + let g_q = two_adic_gen(l + log_qd); + let quot = quotient_sum([gl_zero(), gl_zero()], zeta, l, qd, g_q, q_opened, lastq, qd, 0); + assert_eq!(eg_eq(eg_mul(comp, invv), quot), 1); + ood_loop(rest, prep_indices, log_degrees, accs, stage1, stage2, prep_opt, + q_opened, i + 1, naccp, lastq + qd, lch, fch, alpha, zeta), + } + } + + -- The fingerprint of one claim's values: `Σ vᵢ · fch^i` (each `vᵢ` lifted from + -- its raw u64 limb to an extension element). Mirrors `lookup::fingerprint`. + fn fingerprint_vals(fch: Ext, vals: List‹U64›) -> Ext { + match load(vals) { + ListNode.Nil => [gl_zero(), gl_zero()], + ListNode.Cons(v, rest) => + eg_add([gl_reduce(v), gl_zero()], eg_mul(fch, fingerprint_vals(fch, rest))), + } + } + + -- The initial lookup accumulator built from the public claims: + -- `acc = Σ_claims 1 / (lookup_challenge + fingerprint(fingerprint_challenge, claim))` + -- (Rust `verify_multiple_claims`, lines 227-232). Empty claim list → zero. + fn claims_acc(acc: Ext, claims: List‹List‹U64››, lch: Ext, fch: Ext) -> Ext { + match load(claims) { + ListNode.Nil => acc, + ListNode.Cons(c, rest) => + let msg = eg_add(lch, fingerprint_vals(fch, c)); + claims_acc(eg_add(acc, eg_inverse(msg)), rest, lch, fch), + } + } + + -- Step 3 + 5: derive the challenges via the (prover-faithful) Fiat-Shamir + -- replay over the verifying key's preprocessed commitment + the proof + -- commitments + log_degrees + claims, seed the lookup accumulator from the + -- claims, then run the OOD composition/quotient check for every circuit. + -- Returns 1 on success (any mismatch aborts via `assert_eq!`). + fn ood_verify(sys: Sys, proof: Proof, claims: List‹List‹U64››, + num_queries: G, commit_pow_bits: G, log_blowup_pub: G) -> G { + -- The FRI parameters (`num_queries`, `commit_pow_bits`, `log_blowup`) are + -- public inputs. `log_blowup` also lives in the (digest-bound) verifying + -- key's CommitmentParameters — assert the two agree. + let Sys.Mk(params, circuits, commit, prep_indices) = sys; + let SysParams.Mk(log_blowup, _cap_height) = params; + assert_eq!(eq_zero(log_blowup - log_blowup_pub), 1); + match proof { + Proof.Mk(commitments, accs, log_degrees, opening, + q_opened, prep_opt, stage1, stage2) => + let Commitments.Mk(s1c, s2c, qc) = commitments; + let prep_cap = opt_commit_cap(commit); + let (lch, fch, alpha, zeta, post_zeta_input) = fiat_shamir(prep_cap, s1c, s2c, qc, log_degrees, claims); + let acc0 = claims_acc([gl_zero(), gl_zero()], claims, lch, fch); + -- Step 5: OOD composition/quotient identity for every circuit. + let _ood = ood_loop(circuits, prep_indices, log_degrees, accs, stage1, stage2, + prep_opt, q_opened, 0, acc0, 0, lch, fch, alpha, zeta); + -- Step 4: FRI PCS proximity + opening consistency, continuing the same + -- Fiat-Shamir transcript past ζ (observe opened values → α, βs, query). + pcs_fri_verify(post_zeta_input, stage1, stage2, q_opened, prep_opt, opening, + s1c, s2c, qc, prep_cap, circuits, prep_indices, log_degrees, zeta, + list_length(circuits), log_blowup, num_queries, commit_pow_bits), + } + } + + -- Read the public claims from the verifier's IO channel. Wire format (set by + -- the prover-side harness): u64 `num_claims`, then per claim a u64 `num_vals` + -- followed by `num_vals` raw `u64` `Val`s (8 LE bytes each, canonical < p). + fn read_claims(stream: ByteStream) -> (List‹List‹U64››, ByteStream) { + let (n, s) = read_count(stream); + read_claims_n(s, n) + } + fn read_claims_n(stream: ByteStream, n: G) -> (List‹List‹U64››, ByteStream) { + match n { + 0 => (store(ListNode.Nil), stream), + _ => + let (c, s) = read_one_claim(stream); + let (rest, s2) = read_claims_n(s, n - 1); + (store(ListNode.Cons(c, rest)), s2), + } + } + fn read_one_claim(stream: ByteStream) -> (List‹U64›, ByteStream) { + let (m, s) = read_count(stream); + read_u64_vec_n(s, m) + } +⟧ + +end MultiStark + +end diff --git a/RecursiveVerifier.lean b/RecursiveVerifier.lean new file mode 100644 index 00000000..b55d93ac --- /dev/null +++ b/RecursiveVerifier.lean @@ -0,0 +1,201 @@ +import Ix.Aiur.Meta +import Ix.Aiur.Protocol +import Ix.Aiur.Compiler +import Ix.Aiur.Statistics +import Ix.MultiStark +import Ix.Keccak +import Tests.Aiur.Common + +/-! +# Recursive verifier end-to-end test + +A standalone executable that exercises the Multi-STARK verifier scaffold +(`Ix/MultiStark.lean`) against a real proof: + +1. Define a tiny Aiur toplevel with a `factorial` entrypoint. +2. Prove `factorial(5) = 120` with the Multi-STARK backend. +3. Serialize that proof (`Proof.toBytes`, bincode) and seed it on IO + channel 0 (key `[0]`; vk on channel 1, claims on channel 2) — the hint the + recursive verifier reads non-deterministically via `#read_byte_stream`. +4. Run the `verify_multi_stark_proof` entrypoint over that IO buffer and + prove *its* execution, producing a recursive proof. + +Run with `lake exe recursive-verifier`. +-/ + +open Aiur + +private def toHex (b : ByteArray) : String := + b.data.foldl (fun s x => + let h := String.ofList (Nat.toDigits 16 x.toNat) + s ++ (if h.length == 1 then "0" ++ h else h)) "" + +/-- 8 little-endian bytes of a `Nat` (taken mod 2^64). -/ +private def u64le (n : Nat) : Array UInt8 := + (Array.range 8).map (fun i => UInt8.ofNat ((n >>> (8 * i)) % 256)) + +/-- Serialize the public claims for the verifier's IO channel, matching the +in-circuit `read_claims` wire format: u64 `num_claims`, then per claim a u64 +`num_vals` followed by the `Val`s as canonical 8-byte little-endian `u64`s. -/ +private def serializeClaims (claims : Array (Array Aiur.G)) : ByteArray := Id.run do + let mut out : Array UInt8 := u64le claims.size + for c in claims do + out := out ++ u64le c.size + for g in c do + out := out ++ u64le g.val.toNat + return ⟨out⟩ + +/-- A tiny Aiur program: `factorial` as the proving entrypoint. -/ +def factorialProgram : Source.Toplevel := ⟦ + pub fn factorial(n: G) -> G { + match n { + 0 => 1, + _ => n * factorial(n - 1), + } + } +⟧ + +/-- A tractable subset of the production FRI config that still exercises every +generalized verifier path: `numQueries := 2` drives the multi-query loop, +`commitProofOfWorkBits := 8` drives the commit-phase proof-of-work grinding +check (one per FRI round), and `log_blowup` is read from the verifying key (the +verifier code is blowup-value-agnostic). The full production parameters +(`logBlowup := 2`, `numQueries := 100`, `commitProofOfWorkBits := 20`) use the +same code paths but make *proving* (large LDE + 2²⁰ grinding per round) too slow +for this end-to-end harness; the verifier itself handles them unchanged. -/ +def recCommitParams : Aiur.CommitmentParameters := + { logBlowup := 2, capHeight := 0 } +def innerFri : FriParameters := + { logFinalPolyLen := 0, maxLogArity := 1, numQueries := 3, + commitProofOfWorkBits := 20, queryProofOfWorkBits := 0 } + +/-- Compile a toplevel and build its proving system, or fail with a message. -/ +def buildSystem (label : String) (top : Source.Toplevel) : + IO (CompiledToplevel × AiurSystem) := do + let compiled ← match top.compile with + | .error e => throw <| IO.userError s!"{label}: compilation failed: {e}" + | .ok c => pure c + pure (compiled, AiurSystem.build compiled.bytecode recCommitParams) + +def main : IO UInt32 := do + -- ── 1. factorial system ────────────────────────────────────────────────── + let (facCompiled, facSystem) ← buildSystem "factorial" factorialProgram + let facIdx ← match facCompiled.getFuncIdx `factorial with + | some i => pure i + | none => IO.eprintln "factorial entrypoint not found"; return 1 + + -- ── 2. prove factorial(5) = 120 ────────────────────────────────────────── + -- `G` is a reserved token (the Aiur DSL), so spell the field type qualified. + let input := #[Aiur.G.ofNat 5] + let (claim, proof, _) := facSystem.prove innerFri facIdx input default + let expected := buildClaim facIdx input #[Aiur.G.ofNat 120] + if claim != expected then + IO.eprintln s!"unexpected factorial claim:\n got {claim}\n want {expected}" + return 1 + IO.println s!"✓ proved factorial(5) = 120 (claim = {claim})" + match facSystem.verify innerFri claim (.ofBytes proof.toBytes) with + | .error e => IO.eprintln s!"inner proof failed to verify: {e}"; return 1 + | .ok _ => IO.println "✓ inner proof verifies" + + -- ── 3. serialize the proof — pure non-deterministic advice (NOT hashed, not + -- a public input; the verifier's own checks are what bind it) ───────── + let proofBytes := proof.toBytes + IO.println s!" serialized proof: {proofBytes.size} bytes (advice, not digest-bound)" + let proofGs : Array Aiur.G := proofBytes.data.map .ofUInt8 + + -- Verifying key (`System`) bytes + its keccak-256 digest. + let vkBytes := facSystem.vkBytes + IO.println s!" verifying key: {vkBytes.size} bytes (~{(vkBytes.size + 1) / 136 + 1} keccak-f blocks)" + let sysDigest := Keccak.hash vkBytes + IO.println s!" keccak256(vk) = {toHex sysDigest}" + let sysDigestInput : Array Aiur.G := sysDigest.data.map .ofUInt8 + let vkGs : Array Aiur.G := vkBytes.data.map .ofUInt8 + + -- Public claims (`&[&[Val]]` = the single factorial claim) + keccak digest. + let claimBytes := serializeClaims #[claim] + IO.println s!" claims: {claimBytes.size} bytes" + let claimsDigest := Keccak.hash claimBytes + IO.println s!" keccak256(claims)= {toHex claimsDigest}" + let claimsDigestInput : Array Aiur.G := claimsDigest.data.map .ofUInt8 + let claimGs : Array Aiur.G := claimBytes.data.map .ofUInt8 + + -- Public input = vk digest ++ claims digest ++ FRI params (num_queries, + -- commit_pow_bits, log_blowup); IO hints: proof at [0], vk at [1], claims at [2]. + let friParamInput : Array Aiur.G := + #[Aiur.G.ofNat innerFri.numQueries, Aiur.G.ofNat innerFri.commitProofOfWorkBits, + Aiur.G.ofNat recCommitParams.logBlowup] + let input : Array Aiur.G := sysDigestInput ++ claimsDigestInput ++ friParamInput + let verifierIO : IOBuffer := + (((default : IOBuffer).extend 0 #[Aiur.G.ofNat 0] proofGs).extend 1 #[Aiur.G.ofNat 0] vkGs).extend 2 #[Aiur.G.ofNat 0] claimGs + + -- ── 4. recursive verifier system ───────────────────────────────────────── + -- The PRODUCTION toplevel (no test circuits — every `pub fn` adds width). + let vTop ← match MultiStark.multiStark with + | .error e => IO.eprintln s!"verifier toplevel merge failed: {e}"; return 1 + | .ok t => pure t + let (vCompiled, _vSystem) ← buildSystem "verifier" vTop + -- The TESTS toplevel: the verifier plus the `*_test` entrypoints. Compiled + -- separately so the test circuits never widen the production system; only the + -- bytecode is needed (tests are executed, never proven). + let tTop ← match MultiStark.multiStarkTests with + | .error e => IO.eprintln s!"tests toplevel merge failed: {e}"; return 1 + | .ok t => pure t + let tCompiled ← match tTop.compile with + | .error e => throw <| IO.userError s!"verifier-tests: compilation failed: {e}" + | .ok c => pure c + let runTest (name : Lean.Name) (okMsg : String) : IO Bool := do + match tCompiled.bytecode.execute (tCompiled.getFuncIdx name).get! #[] default with + | .error e => IO.eprintln s!"✗ {name} FAILED — {e}"; pure false + | .ok _ => IO.println s!"✓ {okMsg}"; pure true + -- Validate the verifier's primitives against the Rust reference values. + if !(← runTest `pcs_hash_test "pcs_hash_test: keccak MMCS sponge/compress match reference") then return 1 + if !(← runTest `pcs_merkle_test "pcs_merkle_test: Merkle verify_batch matches reference (root + tamper)") then return 1 + if !(← runTest `sample_bits_test "sample_bits_test: challenger sample_bits matches reference") then return 1 + if !(← runTest `pcs_challenger4_test "pcs_challenger4_test: PCS challenger continuation (α_pcs/α_fri/β/index) matches reference") then return 1 + if !(← runTest `fri_fold_test "fri_fold_test: FRI arity-2 fold_row matches reference") then return 1 + if !(← runTest `ro_fold_test "ro_fold_test: open_input reduced-opening math matches reference") then return 1 + if !(← runTest `gl_addsub_test "gl_addsub_test: non-native Goldilocks add/sub match reference") then return 1 + if !(← runTest `gl_muldiv_test "gl_muldiv_test: non-native Goldilocks mul/inverse/div match reference") then return 1 + if !(← runTest `eg_ops_test "eg_ops_test: non-native ExtGoldilocks add/mul/inverse/div match reference") then return 1 + let vIdx ← match vCompiled.getFuncIdx `verify_multi_stark_proof with + | some i => pure i + | none => IO.eprintln "verify_multi_stark_proof entrypoint not found"; return 1 + + -- ── 5. run the verifier: deserialize proof (advice) + vk, replay + -- Fiat-Shamir, run the OOD + FRI checks ───────────────────────────── + IO.println "running verifier (advice proof + vk digest binding + OOD + FRI)…" + match vCompiled.bytecode.execute vIdx input verifierIO with + | .error e => IO.eprintln s!"✗ verifier rejected: {e}"; return 1 + | .ok (_, _, queryCounts) => + IO.println "✓ verifier accepted: vk digest bound, OOD + FRI checks pass" + -- ── 6. negative test: tampered proof ADVICE must be rejected by the + -- verification checks themselves (no digest binding any more) — + -- this isolates the Fiat-Shamir/Merkle/OOD/FRI rejection path. + -- Byte 0 is the first stage_1-commitment limb: it diverges the + -- replayed transcript from the one the proof was generated under. + let badProofGs := proofGs.set! 0 (Aiur.G.ofNat ((proofBytes.data[0]!.toNat + 1) % 256)) + let badProofIO : IOBuffer := + (((default : IOBuffer).extend 0 #[Aiur.G.ofNat 0] badProofGs).extend 1 #[Aiur.G.ofNat 0] vkGs).extend 2 #[Aiur.G.ofNat 0] claimGs + match vCompiled.bytecode.execute vIdx input badProofIO with + | .error _ => IO.println "✓ tampered proof advice correctly rejected (verification checks)" + | .ok _ => IO.eprintln "✗ tampered proof advice was NOT rejected"; return 1 + -- ── 6b. negative test: a tampered CLAIM (with a matching keccak digest) + -- must be rejected by the OOD / accumulator check. Changing the + -- claim feeds a different value into Fiat-Shamir (→ different ζ) and + -- into the lookup accumulator, so the composition/quotient identity + -- no longer holds even though the keccak binding passes. ────────── + let badClaim : Array Aiur.G := claim.set! (claim.size - 1) (Aiur.G.ofNat 121) + let badClaimBytes := serializeClaims #[badClaim] + let badClaimsDigest := Keccak.hash badClaimBytes + let badClaimInput : Array Aiur.G := + sysDigestInput ++ (badClaimsDigest.data.map .ofUInt8) ++ friParamInput + let badClaimIO : IOBuffer := + (((default : IOBuffer).extend 0 #[Aiur.G.ofNat 0] proofGs).extend 1 #[Aiur.G.ofNat 0] vkGs).extend + 2 #[Aiur.G.ofNat 0] (badClaimBytes.data.map .ofUInt8) + match vCompiled.bytecode.execute vIdx badClaimInput badClaimIO with + | .error _ => IO.println "✓ tampered claim correctly rejected (OOD/accumulator mismatch)" + | .ok _ => IO.eprintln "✗ tampered claim was NOT rejected"; return 1 + -- ── 7. circuit statistics ─────────────────────────────────────────────── + IO.println "\n── verifier circuit statistics ──" + Aiur.printStats (Aiur.computeStats vCompiled queryCounts) + pure 0 diff --git a/lakefile.lean b/lakefile.lean index 5362daa8..853c8191 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -98,6 +98,10 @@ lean_exe «arena-exclude» where root := `Tests.Ix.Kernel.ArenaExclude supportInterpreter := true +lean_exe «recursive-verifier» where + root := `RecursiveVerifier + supportInterpreter := true + end Tests section Benchmarks diff --git a/src/aiur.rs b/src/aiur.rs index d54325cc..a218d9b8 100644 --- a/src/aiur.rs +++ b/src/aiur.rs @@ -5,6 +5,7 @@ pub mod gadgets; pub mod memory; pub mod synthesis; pub mod trace; +pub mod vk_codec; use multi_stark::p3_field::PrimeCharacteristicRing; diff --git a/src/aiur/constraints.rs b/src/aiur/constraints.rs index abdbef2b..4ad382b0 100644 --- a/src/aiur/constraints.rs +++ b/src/aiur/constraints.rs @@ -33,6 +33,7 @@ type Degree = u8; static INV_256: LazyLock = LazyLock::new(|| G::from_u64(256).inverse()); /// Holds data for a function circuit. +#[derive(serde::Serialize, serde::Deserialize)] pub struct Constraints { pub zeros: Vec, pub selectors: Range, diff --git a/src/aiur/memory.rs b/src/aiur/memory.rs index fa676394..d6069942 100644 --- a/src/aiur/memory.rs +++ b/src/aiur/memory.rs @@ -15,8 +15,9 @@ use rayon::{ use crate::aiur::{G, execute::QueryRecord, memory_channel}; +#[derive(serde::Serialize, serde::Deserialize)] pub struct Memory { - width: usize, + pub(crate) width: usize, } impl Memory { diff --git a/src/aiur/synthesis.rs b/src/aiur/synthesis.rs index 1dd7f971..bc861635 100644 --- a/src/aiur/synthesis.rs +++ b/src/aiur/synthesis.rs @@ -26,10 +26,11 @@ pub struct AiurSystem { toplevel: Toplevel, // perhaps remove the key from the system in verifier only mode? key: ProverKey, - system: System, + pub(crate) system: System, } -enum AiurCircuit { +#[derive(serde::Serialize, serde::Deserialize)] +pub(crate) enum AiurCircuit { Function(Constraints), Memory(Memory), Bytes1, diff --git a/src/aiur/vk_codec.rs b/src/aiur/vk_codec.rs new file mode 100644 index 00000000..373ca6e7 --- /dev/null +++ b/src/aiur/vk_codec.rs @@ -0,0 +1,314 @@ +//! (De)serialization of the verifier's key: `System`. +//! +//! The verifier needs the system's per-circuit AIR (symbolic constraints + +//! lookups + widths), the shared preprocessed commitment, the preprocessed +//! index map, and the commitment parameters — i.e. everything in +//! [`System`] except the prover-only preprocessed traces (the +//! large gadget tables in each `LookupAir.preprocessed`, which are +//! `#[serde(skip)]`-ed and reconstructed/committed separately). +//! +//! This uses serde (`#[derive(Serialize, Deserialize)]` on `System`, `Circuit`, +//! `LookupAir`, `Lookup`, `SymbolicExpression`, `SymbolicVariable`, `Entry`, +//! `CommitmentParameters`, and the Ix `AiurCircuit`/`Constraints`/`Memory`) +//! with the **same bincode config the proof uses** +//! (`standard().with_little_endian().with_fixed_int_encoding()`), so the wire +//! format matches `Proof`'s and can be re-implemented in Aiur the same way. + +// The codec is exercised by tests today and wired to the FFI / Aiur port next. +#![allow(dead_code)] + +use bincode::{ + config::{Configuration, Fixint, LittleEndian, standard}, + serde::{decode_from_slice, encode_to_vec}, +}; +use multi_stark::system::System; + +use crate::aiur::synthesis::{AiurCircuit, AiurSystem}; + +/// The bincode configuration shared with `multi_stark::prover::Proof`. +fn serde_config() -> Configuration { + standard().with_little_endian().with_fixed_int_encoding() +} + +/// Serialize the verifying key `System` (preprocessed traces are +/// skipped — see the module docs). +pub(crate) fn to_bytes( + system: &System, +) -> Result, String> { + encode_to_vec(system, serde_config()).map_err(|e| e.to_string()) +} + +/// Deserialize a `System` from [`to_bytes`] output, requiring that +/// every byte is consumed. +pub(crate) fn from_bytes(bytes: &[u8]) -> Result, String> { + let (system, consumed) = + decode_from_slice(bytes, serde_config()).map_err(|e| e.to_string())?; + if consumed != bytes.len() { + return Err(format!( + "trailing data: consumed {consumed} of {}", + bytes.len() + )); + } + Ok(system) +} + +/// Convenience: serialize the verifying key of a built [`AiurSystem`]. +pub(crate) fn aiur_system_to_bytes( + sys: &AiurSystem, +) -> Result, String> { + to_bytes(&sys.system) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::aiur::gadgets::{AiurGadget, bytes1::Bytes1, bytes2::Bytes2}; + use multi_stark::{lookup::LookupAir, types::CommitmentParameters}; + + /// Build a small real `System` from the two byte gadgets and + /// check the verifying-key codec round-trips (re-encoding is byte-identical). + #[test] + fn system_vk_round_trips() { + let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 }; + let (system, _key) = System::new( + cp, + [ + LookupAir::new(AiurCircuit::Bytes1, Bytes1.lookups()), + LookupAir::new(AiurCircuit::Bytes2, Bytes2.lookups()), + ], + ); + let bytes = to_bytes(&system).expect("encode"); + let back = from_bytes(&bytes).expect("decode"); + let reencoded = to_bytes(&back).expect("re-encode"); + assert_eq!(bytes, reencoded, "verifying-key codec round-trip mismatch"); + } + + #[test] + fn rejects_trailing_bytes() { + let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 }; + let (system, _key) = + System::new(cp, [LookupAir::new(AiurCircuit::Bytes1, Bytes1.lookups())]); + let mut bytes = to_bytes(&system).expect("encode"); + bytes.push(0); + assert!(from_bytes(&bytes).is_err(), "should reject trailing data"); + } +} + +// ════════════════════════════════════════════════════════════════════════════ +// Manual deserializer — a hand-written reader matching the exact bincode bytes +// `to_bytes` produces, decoding straight into the real `System`. +// This is the reference re-implemented in Aiur (the verifier reads its key from +// this stream); `manual_matches_serde` proves it agrees with serde. +// +// bincode `standard().little_endian().fixed_int` layout: +// enum tag : u32 (4 bytes LE) Option : 1 byte (0 = None / 1 = Some) +// usize / u64 / G : 8 bytes LE Vec : u64 length, then elements +// struct : fields in declaration order Range : start, end (u64) +// MerkleCap : Vec<[u64; 4]> PhantomData : 0 bytes +// ════════════════════════════════════════════════════════════════════════════ + +use multi_stark::{ + builder::symbolic::{Entry, SymbolicExpression, SymbolicVariable}, + lookup::{Lookup, LookupAir}, + p3_field::PrimeCharacteristicRing, + system::Circuit, + types::{Commitment, CommitmentParameters, Val}, +}; + +use crate::aiur::{constraints::Constraints, memory::Memory}; + +type Expr = SymbolicExpression; + +struct R<'a> { + buf: &'a [u8], + pos: usize, +} + +impl<'a> R<'a> { + fn take(&mut self, n: usize) -> Result<&'a [u8], String> { + let end = self.pos.checked_add(n).ok_or("length overflow")?; + if end > self.buf.len() { + return Err(format!("eof: need {n} at offset {}", self.pos)); + } + let s = &self.buf[self.pos..end]; + self.pos = end; + Ok(s) + } + fn u8(&mut self) -> Result { + Ok(self.take(1)?[0]) + } + fn u32(&mut self) -> Result { + Ok(u32::from_le_bytes(self.take(4)?.try_into().unwrap())) + } + fn u64(&mut self) -> Result { + Ok(u64::from_le_bytes(self.take(8)?.try_into().unwrap())) + } + fn usize(&mut self) -> Result { + usize::try_from(self.u64()?).map_err(|e| format!("usize overflow: {e}")) + } + fn g(&mut self) -> Result { + Ok(Val::from_u64(self.u64()?)) + } + fn entry(&mut self) -> Result { + Ok(match self.u32()? { + 0 => Entry::Preprocessed { offset: self.usize()? }, + 1 => Entry::Main { offset: self.usize()? }, + 2 => Entry::Stage2 { offset: self.usize()? }, + 3 => Entry::Public, + 4 => Entry::Stage2Public, + 5 => Entry::Challenge, + t => return Err(format!("bad Entry tag {t}")), + }) + } + fn expr(&mut self) -> Result { + Ok(match self.u32()? { + 0 => SymbolicExpression::Variable(SymbolicVariable::new( + self.entry()?, + self.usize()?, + )), + 1 => SymbolicExpression::IsFirstRow, + 2 => SymbolicExpression::IsLastRow, + 3 => SymbolicExpression::IsTransition, + 4 => SymbolicExpression::Constant(self.g()?), + 5 => { + let x = Box::new(self.expr()?); + let y = Box::new(self.expr()?); + SymbolicExpression::Add { x, y, degree_multiple: self.usize()? } + }, + 6 => { + let x = Box::new(self.expr()?); + let y = Box::new(self.expr()?); + SymbolicExpression::Sub { x, y, degree_multiple: self.usize()? } + }, + 7 => { + let x = Box::new(self.expr()?); + SymbolicExpression::Neg { x, degree_multiple: self.usize()? } + }, + 8 => { + let x = Box::new(self.expr()?); + let y = Box::new(self.expr()?); + SymbolicExpression::Mul { x, y, degree_multiple: self.usize()? } + }, + t => return Err(format!("bad SymbolicExpression tag {t}")), + }) + } + fn vec( + &mut self, + mut f: impl FnMut(&mut Self) -> Result, + ) -> Result, String> { + let n = self.usize()?; + let mut v = Vec::with_capacity(n.min(1 << 16)); + for _ in 0..n { + v.push(f(self)?); + } + Ok(v) + } + fn lookup(&mut self) -> Result, String> { + Ok(Lookup { multiplicity: self.expr()?, args: self.vec(Self::expr)? }) + } + fn aircircuit(&mut self) -> Result { + Ok(match self.u32()? { + 0 => AiurCircuit::Function(Constraints { + zeros: self.vec(Self::expr)?, + selectors: self.usize()?..self.usize()?, + width: self.usize()?, + }), + 1 => AiurCircuit::Memory(Memory { width: self.usize()? }), + 2 => AiurCircuit::Bytes1, + 3 => AiurCircuit::Bytes2, + t => return Err(format!("bad AiurCircuit tag {t}")), + }) + } + fn circuit(&mut self) -> Result, String> { + // LookupAir: inner_air, lookups (preprocessed is `#[serde(skip)]` -> None). + let air = LookupAir { + inner_air: self.aircircuit()?, + lookups: self.vec(Self::lookup)?, + preprocessed: None, + }; + Ok(Circuit { + air, + constraint_count: self.usize()?, + max_constraint_degree: self.usize()?, + preprocessed_height: self.usize()?, + preprocessed_width: self.usize()?, + stage_1_width: self.usize()?, + stage_2_width: self.usize()?, + }) + } + fn commitment(&mut self) -> Result { + let caps = self.vec(|r| Ok([r.u64()?, r.u64()?, r.u64()?, r.u64()?]))?; + Ok(Commitment::from(caps)) + } + fn option( + &mut self, + f: impl FnOnce(&mut Self) -> Result, + ) -> Result, String> { + match self.u8()? { + 0 => Ok(None), + 1 => Ok(Some(f(self)?)), + t => Err(format!("bad Option tag {t}")), + } + } +} + +/// Hand-written deserializer of the verifying-key `System`, +/// matching serde/bincode byte-for-byte (the Aiur port mirrors this). +pub(crate) fn manual_deserialize( + bytes: &[u8], +) -> Result, String> { + let mut r = R { buf: bytes, pos: 0 }; + let commitment_parameters = + CommitmentParameters { log_blowup: r.usize()?, cap_height: r.usize()? }; + let circuits = r.vec(R::circuit)?; + let preprocessed_commit = r.option(R::commitment)?; + let preprocessed_indices = r.vec(|r| r.option(R::usize))?; + if r.pos != bytes.len() { + return Err(format!( + "trailing data: consumed {} of {}", + r.pos, + bytes.len() + )); + } + Ok(System { + commitment_parameters, + circuits, + preprocessed_commit, + preprocessed_indices, + }) +} + +#[cfg(test)] +mod manual_tests { + use super::*; + use crate::aiur::gadgets::{AiurGadget, bytes1::Bytes1, bytes2::Bytes2}; + use multi_stark::{lookup::LookupAir, types::CommitmentParameters}; + + /// The manual deserializer agrees with serde: decoding serde's bytes and + /// re-encoding with serde reproduces them byte-for-byte. + #[test] + fn manual_matches_serde() { + let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 }; + let (system, _key) = System::new( + cp, + [ + LookupAir::new(AiurCircuit::Bytes1, Bytes1.lookups()), + LookupAir::new(AiurCircuit::Bytes2, Bytes2.lookups()), + ], + ); + let bytes = to_bytes(&system).expect("serde encode"); + let manual = manual_deserialize(&bytes).expect("manual decode"); + let reencoded = to_bytes(&manual).expect("serde re-encode"); + assert_eq!(bytes, reencoded, "manual deserializer disagrees with serde"); + } + + #[test] + fn manual_rejects_trailing() { + let cp = CommitmentParameters { log_blowup: 1, cap_height: 0 }; + let (system, _key) = + System::new(cp, [LookupAir::new(AiurCircuit::Bytes2, Bytes2.lookups())]); + let mut bytes = to_bytes(&system).expect("encode"); + bytes.push(0); + assert!(manual_deserialize(&bytes).is_err()); + } +} diff --git a/src/ffi/aiur/protocol.rs b/src/ffi/aiur/protocol.rs index ed2ec7b4..a26d7391 100644 --- a/src/ffi/aiur/protocol.rs +++ b/src/ffi/aiur/protocol.rs @@ -57,6 +57,19 @@ extern "C" fn rs_aiur_proof_of_bytes( LeanExternal::alloc(&AIUR_PROOF_CLASS, proof) } +/// `Aiur.AiurSystem.vkBytes : @& AiurSystem → ByteArray` +/// +/// Serializes the verifying key (`System`) — see +/// `crate::aiur::vk_codec`. +#[unsafe(no_mangle)] +extern "C" fn rs_aiur_system_vk_bytes( + system: LeanExternal>, +) -> LeanByteArray { + let bytes = crate::aiur::vk_codec::aiur_system_to_bytes(system.get()) + .expect("VK serialization error"); + LeanByteArray::from_bytes(&bytes) +} + /// `AiurSystem.build : @&Bytecode.Toplevel → @&CommitmentParameters → AiurSystem` #[unsafe(no_mangle)] extern "C" fn rs_aiur_system_build(