From c22ce1b1b3d3767349122fa663ae0a1929d564e0 Mon Sep 17 00:00:00 2001 From: sphere <101384151+spherel@users.noreply.github.com> Date: Thu, 4 Jun 2026 07:13:13 -0700 Subject: [PATCH] fix(soundness): range-check HintsTable init limbs (#999) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `HintsTable` is the only RAM table with prover-witnessed (non-zero) init values. `DynVolatileRamTableInitConfig::construct_circuit` created those init-value limbs as raw `WitIn`s with no range check, so a malicious prover could supply non-canonical limbs (>= 2^16). The load path reads memory via `UInt::new_unchecked` (e.g. `LW` in load_v2.rs) and forwards the limbs to the destination register unconstrained, so a non-canonical hint word would propagate into the computation — an under-constraint soundness bug. Fix: bind every non-zero-init limb to `LIMB_BITS` (u16) in `construct_circuit`, making the reconstructed word a canonical u32. Wiring notes: - A range check is a "looking" lookup (`lk_expressions`). The default `TableCircuit::build_gkr_iop_circuit` only wires table records, so `DynVolatileRamCircuit` now overrides it to size the r/w/lk groups with looking + table lengths. For zero-init tables (heap, stack) the looking lengths are 0, so the wiring is byte-for-byte identical to the default. - The lookup multiplicity must land in `combined_lk_mlt`, so the hint init circuit is assigned via a new `ZKVMWitnesses::assign_table_circuit_with_lk` (mirrors `assign_shared_circuit`) and moved before `finalize_lk_multiplicities` in `generate_witness` (main + GPU-debug paths), matching the existing ShardRam pre-finalize ordering. Validated: `cargo check`, `cargo clippy -D warnings`, the `ram_impl` unit tests — including a new soundness regression `test_hint_init_rejects_non_canonical_limb` that forces each init limb to `2^LIMB_BITS` and asserts MockProver rejects the witness via the `init_v_limb_{i}_in_u16` range lookup (the test fails if the range check is removed) — and real (non-mock) prove+verify of fibonacci with hints, single shard and 61-shard. The proof balances the global logup including the hint circuit's range-check records, confirming the looking lookups are enforced (a dropped lookup would unbalance the recorded multiplicity). Co-Authored-By: Claude Opus 4.8 --- ceno_zkvm/src/e2e.rs | 42 +++-- .../src/instructions/riscv/rv32im/mmu.rs | 8 +- ceno_zkvm/src/structs.rs | 49 +++++ ceno_zkvm/src/tables/ram/ram_circuit.rs | 80 +++++++++ ceno_zkvm/src/tables/ram/ram_impl.rs | 168 +++++++++++++++++- 5 files changed, 329 insertions(+), 18 deletions(-) diff --git a/ceno_zkvm/src/e2e.rs b/ceno_zkvm/src/e2e.rs index 42edd188c..6ce2a2424 100644 --- a/ceno_zkvm/src/e2e.rs +++ b/ceno_zkvm/src/e2e.rs @@ -1521,6 +1521,22 @@ pub fn generate_witness<'a, E: ExtensionField>( ) }).unwrap(); + // Assign the dynamic-init tables (heap + hints) before + // `finalize_lk_multiplicities`: `HintsInitCircuit` range-checks its + // prover-witnessed init limbs (#999) and folds the per-limb u16 + // lookups into `combined_lk_mlt` via `assign_table_circuit_with_lk`. + info_span!("assign_dynamic_init_table").in_scope(|| { + system_config + .mmu_config + .assign_dynamic_init_table_circuit( + &system_config.zkvm_cs, + &mut zkvm_witness, + &pi, + &emul_result.final_mem_state.hints, + &emul_result.final_mem_state.heap, + ) + }).unwrap(); + info_span!("finalize_lk_multiplicities").in_scope(|| { zkvm_witness.finalize_lk_multiplicities(); }); @@ -1561,7 +1577,9 @@ pub fn generate_witness<'a, E: ExtensionField>( .unwrap(); // Mirror the main path so `combined_lk_mlt` comparison stays // meaningful: continuation pushes ShardRamCircuit's per-row - // y6_lo lookups into `lk_mlts` before finalize. + // y6_lo lookups into `lk_mlts` before finalize, and the dynamic + // init tables push HintsInitCircuit's per-limb u16 range-check + // lookups (#999) before finalize. system_config .mmu_config .assign_continuation_circuit( @@ -1576,6 +1594,16 @@ pub fn generate_witness<'a, E: ExtensionField>( &emul_result.final_mem_state.heap, ) .unwrap(); + system_config + .mmu_config + .assign_dynamic_init_table_circuit( + &system_config.zkvm_cs, + &mut cpu_witness, + &pi, + &emul_result.final_mem_state.hints, + &emul_result.final_mem_state.heap, + ) + .unwrap(); cpu_witness.finalize_lk_multiplicities(); #[cfg(feature = "gpu")] @@ -1655,18 +1683,6 @@ pub fn generate_witness<'a, E: ExtensionField>( } }).unwrap(); - info_span!("assign_dynamic_init_table").in_scope(|| { - system_config - .mmu_config - .assign_dynamic_init_table_circuit( - &system_config.zkvm_cs, - &mut zkvm_witness, - &pi, - &emul_result.final_mem_state.hints, - &emul_result.final_mem_state.heap, - ) - }).unwrap(); - info_span!("assign_program_table").in_scope(|| { zkvm_witness .assign_table_circuit::>( diff --git a/ceno_zkvm/src/instructions/riscv/rv32im/mmu.rs b/ceno_zkvm/src/instructions/riscv/rv32im/mmu.rs index 4d1d038fd..ca782c1f7 100644 --- a/ceno_zkvm/src/instructions/riscv/rv32im/mmu.rs +++ b/ceno_zkvm/src/instructions/riscv/rv32im/mmu.rs @@ -88,6 +88,12 @@ impl MmuConfig { // fixed.register_table_circuit::>(cs, &self.ram_bus_circuit, &()); } + /// Assigns the dynamic-init RAM tables (heap + hints). Must run *before* + /// `ZKVMWitnesses::finalize_lk_multiplicities`: `HintsInitCircuit` is + /// non-zero-init and range-checks its prover-witnessed init limbs (#999), + /// contributing per-limb u16 lookups that must land in `combined_lk_mlt` + /// via `assign_table_circuit_with_lk`. `HeapInitCircuit` is zero-init and + /// emits no lookups, so the plain `assign_table_circuit` path is fine. pub fn assign_dynamic_init_table_circuit( &self, cs: &ZKVMConstraintSystem, @@ -101,7 +107,7 @@ impl MmuConfig { &self.heap_init_config, &(heap_final, pv, pv.heap_shard_len as usize), )?; - witness.assign_table_circuit::>( + witness.assign_table_circuit_with_lk::>( cs, &self.hints_init_config, &(hints_final, pv, pv.hint_shard_len as usize), diff --git a/ceno_zkvm/src/structs.rs b/ceno_zkvm/src/structs.rs index b83efffce..c0a757016 100644 --- a/ceno_zkvm/src/structs.rs +++ b/ceno_zkvm/src/structs.rs @@ -502,6 +502,55 @@ impl ZKVMWitnesses { Ok(()) } + /// Like [`Self::assign_table_circuit`], but for a table circuit that itself + /// *performs* lookups (e.g. a non-zero-init RAM table that range-checks its + /// prover-witnessed init limbs, #999). The circuit's lookup multiplicity is + /// folded into `lk_mlts` so that `finalize_lk_multiplicities` includes it in + /// `combined_lk_mlt` and the range-table `mlt` columns balance the logup. + /// + /// MUST run *before* [`Self::finalize_lk_multiplicities`] (same ordering + /// requirement as [`Self::assign_shared_circuit`]). + pub fn assign_table_circuit_with_lk>( + &mut self, + cs: &ZKVMConstraintSystem, + config: &TC::TableConfig, + input: &TC::WitnessInput<'_>, + ) -> Result<(), ZKVMError> { + assert!( + self.combined_lk_mlt.is_none(), + "assign_table_circuit_with_lk must run before finalize_lk_multiplicities" + ); + let cs = cs.get_cs(&TC::name()).unwrap(); + let mut lk_multiplicity = LkMultiplicity::default(); + let witness = TC::assign_instances_with_lk_multiplicities( + config, + cs.zkvm_v1_css.num_witin as usize, + cs.zkvm_v1_css.num_structural_witin as usize, + &mut lk_multiplicity, + input, + )?; + let witness_instances = witness[0].num_instances(); + let structural_instances = witness[1].num_instances(); + if witness_instances > 0 && structural_instances > 0 { + assert_eq!( + witness_instances, + structural_instances, + "{}: mismatched num_instances between witness and structural RMMs", + TC::name() + ); + } + let num_instances = std::cmp::max(witness_instances, structural_instances); + let input = ChipInput::new(TC::name(), witness, [num_instances, 0]); + assert!(self.witnesses.insert(TC::name(), vec![input]).is_none()); + assert!( + self.lk_mlts + .insert(TC::name(), lk_multiplicity.into_finalize_result()) + .is_none() + ); + + Ok(()) + } + #[allow(clippy::type_complexity)] pub fn assign_shared_circuit( &mut self, diff --git a/ceno_zkvm/src/tables/ram/ram_circuit.rs b/ceno_zkvm/src/tables/ram/ram_circuit.rs index a07448aa3..f0f25b518 100644 --- a/ceno_zkvm/src/tables/ram/ram_circuit.rs +++ b/ceno_zkvm/src/tables/ram/ram_circuit.rs @@ -6,6 +6,7 @@ use crate::{ scheme::PublicValues, structs::{ProgramParams, RAMType}, tables::{RMMCollections, TableCircuit}, + witness::LkMultiplicity, }; use ceno_emul::{Addr, Cycle, GetAddr, WORD_SIZE, Word}; use ff_ext::{ExtensionField, SmallField}; @@ -199,6 +200,7 @@ pub trait DynVolatileRamTableConfigTrait: Sized + Send + Sync { num_witin: usize, num_structural_witin: usize, data: &Self::WitnessInput<'_>, + lk_multiplicity: Option<&LkMultiplicity>, ) -> Result<[RowMajorMatrix; 2], CircuitBuilderError>; } @@ -233,6 +235,61 @@ impl< Ok(cb.namespace(|| Self::name(), |cb| C::construct_circuit(cb, params))?) } + /// Generalizes the default [`TableCircuit::build_gkr_iop_circuit`] to also + /// wire "looking" lookups (`lk_expressions`). Non-zero-init tables range-check + /// their prover-witnessed init limbs (#999), which emits looking lookups in + /// addition to the RAM-bus table write. For zero-init tables the looking + /// lengths are 0, so this matches the default table wiring exactly. + fn build_gkr_iop_circuit( + cb: &mut CircuitBuilder, + param: &ProgramParams, + ) -> Result<(Self::TableConfig, Option>), ZKVMError> { + let config = Self::construct_circuit(cb, param)?; + let r_len = cb.cs.r_expressions.len() + cb.cs.r_table_expressions.len(); + let w_len = cb.cs.w_expressions.len() + cb.cs.w_table_expressions.len(); + // logup lk table records include both `p` and `q`, hence `* 2`. + let lk_len = cb.cs.lk_expressions.len() + cb.cs.lk_table_expressions.len() * 2; + let zero_len = + cb.cs.assert_zero_expressions.len() + cb.cs.assert_zero_sumcheck_expressions.len(); + + let selector = cb.create_placeholder_structural_witin(|| "selector"); + let selector_type = SelectorType::Prefix(selector.expr()); + + // all groups share the same prefix selector + let (out_evals, mut chip) = ( + [ + // r_record + (0..r_len).collect_vec(), + // w_record + (r_len..r_len + w_len).collect_vec(), + // lk_record + (r_len + w_len..r_len + w_len + lk_len).collect_vec(), + // zero_record + (0..zero_len).collect_vec(), + ], + Chip::new_from_cb(cb), + ); + + // register selector to legacy constrain system + if r_len > 0 { + cb.cs.r_selector = Some(selector_type.clone()); + } + if w_len > 0 { + cb.cs.w_selector = Some(selector_type.clone()); + } + if lk_len > 0 { + cb.cs.lk_selector = Some(selector_type.clone()); + } + if zero_len > 0 { + cb.cs.zero_selector = Some(selector_type.clone()); + } + + let layer = Layer::from_circuit_builder(cb, Self::name(), out_evals); + chip.add_layer(layer); + + Ok((config, Some(chip.gkr_circuit()))) + } + fn generate_fixed_traces( _config: &Self::TableConfig, _num_fixed: usize, @@ -255,6 +312,29 @@ impl< num_witin, num_structural_witin, data, + None, + )?, + ) + } + + /// Used for non-zero-init tables (e.g. `HintsTable`) whose init range-check + /// lookups must be folded into `combined_lk_mlt`. The shared `LkMultiplicity` + /// records the per-limb u16 checks emitted by `construct_circuit` (#999). + fn assign_instances_with_lk_multiplicities( + config: &Self::TableConfig, + num_witin: usize, + num_structural_witin: usize, + lk_multiplicity: &mut LkMultiplicity, + data: &Self::WitnessInput<'_>, + ) -> Result, ZKVMError> { + // assume returned table is well-formed include padding + Ok( + >::assign_instances( + config, + num_witin, + num_structural_witin, + data, + Some(&*lk_multiplicity), )?, ) } diff --git a/ceno_zkvm/src/tables/ram/ram_impl.rs b/ceno_zkvm/src/tables/ram/ram_impl.rs index 39591d764..e09e68968 100644 --- a/ceno_zkvm/src/tables/ram/ram_impl.rs +++ b/ceno_zkvm/src/tables/ram/ram_impl.rs @@ -20,6 +20,7 @@ use crate::{ scheme::PublicValues, structs::ProgramParams, tables::ram::ram_circuit::DynVolatileRamTableConfigTrait, + witness::LkMultiplicity, }; use ff_ext::FieldInto; use multilinear_extensions::{Expression, Fixed, StructuralWitIn, ToExpr, WitIn}; @@ -182,6 +183,7 @@ impl DynVolatileRamTableInitCo num_witin: usize, num_structural_witin: usize, (final_mem, _pv, _num_instances): &(&[MemFinalRecord], &PublicValues, usize), + lk_multiplicity: Option<&LkMultiplicity>, ) -> Result<[RowMajorMatrix; 2], CircuitBuilderError> { if final_mem.is_empty() { return Ok([RowMajorMatrix::empty(), RowMajorMatrix::empty()]); @@ -226,10 +228,15 @@ impl DynVolatileRamTableInitCo // Assign value directly. set_val!(row, init_v[0], rec.init_value as u64); } else { - // Assign value limbs. + // Assign value limbs, recording the per-limb u16 + // range-check looked up in `construct_circuit` (#999). + let mut row_lkm = lk_multiplicity.cloned(); init_v.iter().enumerate().for_each(|(l, limb)| { let val = (rec.init_value >> (l * LIMB_BITS)) & LIMB_MASK; set_val!(row, limb, val as u64); + if let Some(m) = row_lkm.as_mut() { + m.assert_ux::(val as u64); + } }); } } @@ -285,6 +292,7 @@ impl DynVolatileRamTableInitCo num_witin: usize, num_structural_witin: usize, (final_mem, pv, num_instances): &(&[MemFinalRecord], &PublicValues, usize), + lk_multiplicity: Option<&LkMultiplicity>, ) -> Result<[RowMajorMatrix; 2], CircuitBuilderError> { // got some duplicated code segment to simplify parallel assignment flow let start_addr = DVRAM::dynamic_addr(&config.params, 0, pv); @@ -331,10 +339,15 @@ impl DynVolatileRamTableInitCo // Assign value directly. set_val!(row, init_v[0], rec.init_value as u64); } else { - // Assign value limbs. + // Assign value limbs, recording the per-limb u16 + // range-check looked up in `construct_circuit` (#999). + let mut row_lkm = lk_multiplicity.cloned(); init_v.iter().enumerate().for_each(|(l, limb)| { let val = (rec.init_value >> (l * LIMB_BITS)) & LIMB_MASK; set_val!(row, limb, val as u64); + if let Some(m) = row_lkm.as_mut() { + m.assert_ux::(val as u64); + } }); } } @@ -397,9 +410,27 @@ impl DynVolatileRamTableConfig let (init_expr, init_v) = if DVRAM::ZERO_INIT { (vec![Expression::ZERO; DVRAM::V_LIMBS], None) } else { + // Non-zero-init tables (e.g. `HintsTable`) take their initial content + // from prover-supplied witnesses. Without a range check a malicious + // prover could choose non-canonical limbs (>= 2^LIMB_BITS): the load + // path reads memory via `UInt::new_unchecked`, so a non-canonical word + // would propagate to a register completely unconstrained (#999). Bind + // every limb to `LIMB_BITS` so the reconstructed word is a canonical + // u32. This assumes the value is laid out as `LIMB_BITS`-wide limbs, + // matching the witness-assignment limb path below. + assert!( + DVRAM::V_LIMBS > 1, + "non-zero-init RAM table must use a multi-limb (LIMB_BITS) layout for the init range check" + ); let init_v = (0..DVRAM::V_LIMBS) .map(|i| cb.create_witin(|| format!("init_v_limb_{i}"))) .collect::>(); + for (i, limb) in init_v.iter().enumerate() { + cb.assert_ux::<_, _, LIMB_BITS>( + || format!("init_v_limb_{i}_in_u{LIMB_BITS}"), + limb.expr(), + )?; + } (init_v.iter().map(|v| v.expr()).collect_vec(), Some(init_v)) }; @@ -434,6 +465,7 @@ impl DynVolatileRamTableConfig num_witin: usize, num_structural_witin: usize, data: &(&[MemFinalRecord], &PublicValues, usize), + lk_multiplicity: Option<&LkMultiplicity>, ) -> Result<[RowMajorMatrix; 2], CircuitBuilderError> { let (final_mem, _, _) = &data; if final_mem.is_empty() { @@ -441,9 +473,21 @@ impl DynVolatileRamTableConfig } assert_eq!(num_structural_witin, 2); if DVRAM::dynamic_length_instance().is_some() || DVRAM::DYNAMIC_OFFSET { - Self::assign_instances_dynamic(config, num_witin, num_structural_witin, data) + Self::assign_instances_dynamic( + config, + num_witin, + num_structural_witin, + data, + lk_multiplicity, + ) } else { - Self::assign_instances(config, num_witin, num_structural_witin, data) + Self::assign_instances( + config, + num_witin, + num_structural_witin, + data, + lk_multiplicity, + ) } } } @@ -682,4 +726,120 @@ mod tests { assert_eq!(addr_padded_view.get_base_field_vec(), expected) } + + /// Soundness regression for #999: a malicious prover must not be able to put + /// a non-canonical (>= 2^LIMB_BITS) limb into a `HintsTable` init word. + /// + /// * The honest witness (canonical limbs) satisfies every range lookup. + /// * Forcing any single init limb to `2^LIMB_BITS` makes the corresponding + /// `init_v_limb_{i}_in_u16` lookup query fall outside the u16 range table, + /// so `MockProver` rejects the tampered witness. Without the fix this row + /// would verify and the load path (`UInt::new_unchecked`) would forward the + /// non-canonical word to a register unconstrained. + #[test] + fn test_hint_init_rejects_non_canonical_limb() { + use crate::{ + instructions::riscv::constants::{LIMB_BITS, UINT_LIMBS}, + scheme::mock_prover::MockProver, + }; + use ff_ext::FromUniformBytes; + use rand::thread_rng; + + let mut cs = ConstraintSystem::::new(|| "riscv"); + let mut cb = CircuitBuilder::new(&mut cs); + let (config, _) = + HintsInitCircuit::build_gkr_iop_circuit(&mut cb, &ProgramParams::default()).unwrap(); + let num_witin = cb.cs.num_witin as usize; + let num_structural = cb.cs.num_structural_witin as usize; + + let def_params = ProgramParams::default(); + let empty_mlt = LkMultiplicity::default().into_finalize_result(); + let pv = PublicValues { + hint_start_addr: CENO_PLATFORM.hints.start, + ..Default::default() + }; + + // Four canonical hint words (power of two => the real rows fill the table + // without padding). Both 16-bit limbs of every `init_value` are < 2^16. + let n = 4usize; + let records = (0..n) + .map(|i| MemFinalRecord { + ram_type: RAMType::Memory, + addr: HintsTable::dynamic_addr(&def_params, i, &pv), + cycle: 0, + value: 0xabcd_0000 + i as u32, + init_value: 0xabcd_0000 + i as u32, + }) + .collect_vec(); + let input = (&records[..], &pv, n); + + // Concrete challenge so MockProver routes through `run_with_challenge`, + // which keeps the structural witnesses; the prefix `selector` gates the + // per-row range lookups. + let mut rng = thread_rng(); + let challenge = [E::random(&mut rng), E::random(&mut rng)]; + + // Honest witness: all per-limb u16 range lookups are satisfied. + let [mut honest_w, mut honest_sw] = HintsInitCircuit::::assign_instances( + &config, + num_witin, + num_structural, + &empty_mlt.0, + &input, + ) + .unwrap(); + honest_w.padding_by_strategy(); + honest_sw.padding_by_strategy(); + MockProver::::assert_satisfied( + &cb, + &honest_w + .to_mles() + .into_iter() + .map(|v| v.into()) + .collect_vec(), + &honest_sw + .to_mles() + .into_iter() + .map(|v| v.into()) + .collect_vec(), + &[], + Some(challenge), + None, + ); + + // Tamper each limb in turn: only the #999 range check can reject it. + for bad_limb in 0..UINT_LIMBS { + let col = cb + .cs + .witin_namespace_map + .iter() + .position(|name| name.contains(&format!("init_v_limb_{bad_limb}"))) + .unwrap_or_else(|| panic!("missing init_v_limb_{bad_limb} column")); + + let [mut bad_w, mut bad_sw] = HintsInitCircuit::::assign_instances( + &config, + num_witin, + num_structural, + &empty_mlt.0, + &input, + ) + .unwrap(); + // Corrupt the first (real) instance row to a non-canonical limb. + bad_w.iter_mut().next().unwrap()[col] = F::from_canonical_u64(1 << LIMB_BITS); + bad_w.padding_by_strategy(); + bad_sw.padding_by_strategy(); + + let expected = format!("init_v_limb_{bad_limb}_in_u{LIMB_BITS}"); + MockProver::::assert_with_expected_errors( + &cb, + &[], + &bad_w.to_mles().into_iter().map(|v| v.into()).collect_vec(), + &bad_sw.to_mles().into_iter().map(|v| v.into()).collect_vec(), + &[], + &[expected.as_str()], + Some(challenge), + None, + ); + } + } }