From 2ac7ea5b3cb8724b41b5e5e94ebf7cef0a32b037 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Sun, 31 May 2026 19:14:06 +0700 Subject: [PATCH] execution_tape: add aggregate ABI shape metadata Keep ValueType::Agg as the runtime ABI kind, but add optional FunctionArgAggShapeEntry metadata to the Program container so frontends can refine aggregate arguments for verification. ProgramBuilder now accepts FunctionAbi in checked function entry points while preserving FunctionSig call sites via Into. Seed verifier aggregate facts from ABI argument shapes at function entry, including tuple element types, struct type ids, and array element ids. This lets closure bodies project typed values from the hidden environment argument without routing internal capture reads through host calls. Migration: existing FunctionSig-based builder calls continue to compile; code that needs typed aggregate arguments can use FunctionSig::with_arg_agg_shape or FunctionSig::closure_body_with_env_shape. --- execution_tape/src/asm.rs | 231 ++++++++++++++++++++++++++++-- execution_tape/src/program.rs | 231 ++++++++++++++++++++++++++++++ execution_tape/src/verifier.rs | 252 ++++++++++++++++++++++++++++++++- execution_tape/src/vm.rs | 46 +++++- 4 files changed, 739 insertions(+), 21 deletions(-) diff --git a/execution_tape/src/asm.rs b/execution_tape/src/asm.rs index 7f0e26a..94ed4e8 100644 --- a/execution_tape/src/asm.rs +++ b/execution_tape/src/asm.rs @@ -18,9 +18,10 @@ use crate::format::{write_sleb128_i64, write_uleb128_u64}; use crate::host::HostSig; use crate::opcode::Opcode; use crate::program::{ - ByteRange, CallSigEntry, CallSigId, Const, ConstId, ElemTypeId, FunctionDef, FunctionNameEntry, - HostSigDef, HostSigId, HostSymbol, LabelNameEntry, Program, SpanEntry, SpanId, StructTypeDef, - SymbolId, TypeId, TypeTableDef, ValueType, + AggShape, ByteRange, CallSigEntry, CallSigId, Const, ConstId, ElemTypeId, + FunctionArgAggShapeEntry, FunctionDef, FunctionNameEntry, HostSigDef, HostSigId, HostSymbol, + LabelNameEntry, Program, SpanEntry, SpanId, StructTypeDef, SymbolId, TypeId, TypeTableDef, + ValueType, }; use crate::value::Decimal; use crate::value::FuncId; @@ -262,6 +263,85 @@ impl FunctionSig { ret_types: call_ret_types.to_vec(), } } + + /// Attaches aggregate shape metadata to argument `arg`. + /// + /// The argument's signature type remains [`ValueType::Agg`]. The shape is verifier metadata + /// used to type-check aggregate projections from ABI-provided values. + #[must_use] + pub fn with_arg_agg_shape(self, arg: u32, shape: AggShape) -> FunctionAbi { + let mut abi = FunctionAbi::from(self); + abi.set_arg_agg_shape(arg, shape); + abi + } + + /// Creates a closure body ABI and attaches shape metadata to the injected environment argument. + /// + /// This is the typed-shape counterpart to [`FunctionSig::closure_body`]. It preserves the + /// caller-visible call signature while telling the verifier what shape the hidden closure + /// environment argument has inside the body. + #[must_use] + pub fn closure_body_with_env_shape( + call_arg_types: &[ValueType], + call_ret_types: &[ValueType], + env_shape: AggShape, + ) -> FunctionAbi { + Self::closure_body(call_arg_types, call_ret_types).with_arg_agg_shape(0, env_shape) + } +} + +/// Function ABI metadata used by [`ProgramBuilder`]. +/// +/// The signature carries runtime value kinds, while `arg_agg_shapes` carries optional verifier +/// refinements for arguments whose runtime kind is [`ValueType::Agg`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FunctionAbi { + /// Runtime function signature. + pub sig: FunctionSig, + /// Optional aggregate shape metadata by argument index. + /// + /// Entries beyond the signature arity are preserved so checked builders can report a verifier + /// error instead of silently dropping caller intent. + pub arg_agg_shapes: Vec>, +} + +impl FunctionAbi { + /// Creates ABI metadata from a runtime function signature with no aggregate shape refinements. + #[must_use] + pub fn new(sig: FunctionSig) -> Self { + let arg_agg_shapes = vec![None; sig.arg_types.len()]; + Self { + sig, + arg_agg_shapes, + } + } + + /// Attaches aggregate shape metadata to argument `arg`. + pub fn set_arg_agg_shape(&mut self, arg: u32, shape: AggShape) { + let arg = arg as usize; + if self.arg_agg_shapes.len() <= arg { + self.arg_agg_shapes.resize_with(arg + 1, || None); + } + self.arg_agg_shapes[arg] = Some(shape); + } +} + +impl From for FunctionAbi { + fn from(sig: FunctionSig) -> Self { + Self::new(sig) + } +} + +impl From<&FunctionSig> for FunctionAbi { + fn from(sig: &FunctionSig) -> Self { + Self::new(sig.clone()) + } +} + +impl From<&Self> for FunctionAbi { + fn from(abi: &Self) -> Self { + abi.clone() + } } /// Convenience builder for constructing small [`Program`]s. @@ -306,6 +386,7 @@ pub struct ProgramBuilder { host_sigs: Vec, types: TypeTableDef, functions: Vec, + function_arg_agg_shapes: Vec, program_name: Option, function_names: Vec, labels: Vec, @@ -566,11 +647,68 @@ impl ProgramBuilder { id } + fn store_function_arg_agg_shapes( + &mut self, + func: FuncId, + arg_agg_shapes: Vec>, + ) { + self.function_arg_agg_shapes.retain(|e| e.func != func.0); + for (arg, shape) in arg_agg_shapes.into_iter().enumerate() { + let Some(shape) = shape else { + continue; + }; + self.function_arg_agg_shapes.push(FunctionArgAggShapeEntry { + func: func.0, + arg: u32::try_from(arg).unwrap_or(u32::MAX), + shape, + }); + } + } + + /// Sets aggregate shape metadata for a function argument. + /// + /// This refines an argument whose declared type is [`ValueType::Agg`] so the verifier can + /// type-check aggregate projections from ABI-provided values. + pub fn set_function_arg_agg_shape( + &mut self, + func: FuncId, + arg: u32, + shape: AggShape, + ) -> Result<(), BuildError> { + let Some(def) = self.functions.get(func.0 as usize) else { + return Err(BuildError::BadFuncId { func: func.0 }); + }; + if (arg as usize) >= def.arg_types.len() { + let arg_count = u32::try_from(def.arg_types.len()).unwrap_or(u32::MAX); + return Err(BuildError::BadArgIndex { + func: func.0, + arg, + arg_count, + }); + } + if let Some(entry) = self + .function_arg_agg_shapes + .iter_mut() + .find(|e| e.func == func.0 && e.arg == arg) + { + entry.shape = shape; + } else { + self.function_arg_agg_shapes.push(FunctionArgAggShapeEntry { + func: func.0, + arg, + shape, + }); + } + Ok(()) + } + /// Declares a function signature and returns its [`FuncId`]. /// /// This is useful when assembling mutually recursive or out-of-order functions: you can /// declare all functions up front, then reference them by [`FuncId`] in [`Asm::call`]. - pub fn declare_function(&mut self, sig: FunctionSig) -> FuncId { + pub fn declare_function(&mut self, abi: impl Into) -> FuncId { + let abi = abi.into(); + let sig = abi.sig; let reg_count = min_reg_count_for_arg_count(sig.arg_types.len()); let id = FuncId(u32::try_from(self.functions.len()).unwrap_or(u32::MAX)); self.functions.push(FunctionDef { @@ -580,6 +718,7 @@ impl ProgramBuilder { bytecode: Vec::new(), spans: Vec::new(), }); + self.store_function_arg_agg_shapes(id, abi.arg_agg_shapes); id } @@ -636,7 +775,13 @@ impl ProgramBuilder { /// /// This resolves labels and records the typed signature. Full verification (including host-call /// signature checks and cross-function call checks) is performed by `build_checked`. - pub fn push_function_checked(&mut self, a: Asm, sig: FunctionSig) -> Result { + pub fn push_function_checked( + &mut self, + a: Asm, + abi: impl Into, + ) -> Result { + let abi = abi.into(); + let sig = abi.sig; let reg_count = a.inferred_reg_count_for_args(sig.arg_types.len()); let parts = a.finish_parts()?; let id = FuncId(u32::try_from(self.functions.len()).unwrap_or(u32::MAX)); @@ -655,6 +800,7 @@ impl ProgramBuilder { name: sym, }); } + self.store_function_arg_agg_shapes(id, abi.arg_agg_shapes); Ok(id) } @@ -691,6 +837,7 @@ impl ProgramBuilder { p.program_name = self.program_name; p.function_names = self.function_names; p.labels = self.labels; + p.function_arg_agg_shapes = self.function_arg_agg_shapes; let func_count = p.functions.len(); let mut has_arg_names = vec![false; func_count]; @@ -957,34 +1104,36 @@ impl Asm { /// /// This constructs a tiny single-function [`Program`] wrapper and runs the verifier. It is /// intended as a quick sanity check for builder users. - pub fn finish_checked(self, sig: &FunctionSig) -> Result, AsmError> { - self.finish_checked_with(sig, &VerifyConfig::default()) + pub fn finish_checked(self, abi: impl Into) -> Result, AsmError> { + self.finish_checked_with(abi, &VerifyConfig::default()) } /// Finalizes, then verifies the resulting bytecode and span table under `sig`. - pub fn finish_checked_parts(self, sig: &FunctionSig) -> Result { - self.finish_checked_parts_with(sig, &VerifyConfig::default()) + pub fn finish_checked_parts(self, abi: impl Into) -> Result { + self.finish_checked_parts_with(abi, &VerifyConfig::default()) } /// Finalizes, then verifies the resulting bytecode under `sig` with a custom verifier config. pub fn finish_checked_with( self, - sig: &FunctionSig, + abi: impl Into, cfg: &VerifyConfig, ) -> Result, AsmError> { - Ok(self.finish_checked_parts_with(sig, cfg)?.bytecode) + Ok(self.finish_checked_parts_with(abi, cfg)?.bytecode) } /// Finalizes, then verifies the resulting bytecode and span table under `sig` with a custom /// verifier config. pub fn finish_checked_parts_with( self, - sig: &FunctionSig, + abi: impl Into, cfg: &VerifyConfig, ) -> Result { + let abi = abi.into(); + let sig = abi.sig; let reg_count = self.inferred_reg_count_for_args(sig.arg_types.len()); let parts = self.finish_parts()?; - let p = Program::new( + let mut p = Program::new( Vec::new(), Vec::new(), Vec::new(), @@ -997,6 +1146,16 @@ impl Asm { spans: parts.spans.clone(), }], ); + for (arg, shape) in abi.arg_agg_shapes.into_iter().enumerate() { + let Some(shape) = shape else { + continue; + }; + p.function_arg_agg_shapes.push(FunctionArgAggShapeEntry { + func: 0, + arg: u32::try_from(arg).unwrap_or(u32::MAX), + shape, + }); + } verify_program(&p, cfg)?; Ok(parts) } @@ -2300,6 +2459,52 @@ mod tests { ); } + #[test] + fn function_sig_closure_body_with_env_shape_marks_env_arg() { + let abi = FunctionSig::closure_body_with_env_shape( + &[ValueType::I64], + &[ValueType::I64], + AggShape::tuple(vec![Some(ValueType::I64)]), + ); + + assert_eq!( + abi.sig, + FunctionSig { + arg_types: vec![ValueType::Agg, ValueType::I64], + ret_types: vec![ValueType::I64], + } + ); + assert_eq!( + abi.arg_agg_shapes, + vec![Some(AggShape::tuple(vec![Some(ValueType::I64)])), None] + ); + } + + #[test] + fn program_builder_preserves_function_arg_agg_shapes() { + let mut pb = ProgramBuilder::new(); + let mut a = Asm::new(); + a.tuple_get(2, 1, 0); + a.ret(0, &[2]); + + let func = pb + .push_function_checked( + a, + FunctionSig { + arg_types: vec![ValueType::Agg], + ret_types: vec![ValueType::I64], + } + .with_arg_agg_shape(0, AggShape::tuple(vec![Some(ValueType::I64)])), + ) + .unwrap(); + + let program = pb.build_checked().unwrap(); + assert_eq!( + program.function_arg_agg_shape(func.0, 0), + Some(&AggShape::tuple(vec![Some(ValueType::I64)])) + ); + } + #[test] fn program_builder_build_verified_preserves_call_sigs_for_call_indirect() { let mut pb = ProgramBuilder::new(); diff --git a/execution_tape/src/program.rs b/execution_tape/src/program.rs index 27e2141..1e5b891 100644 --- a/execution_tape/src/program.rs +++ b/execution_tape/src/program.rs @@ -262,6 +262,12 @@ pub struct Program { pub spans: Vec, /// Program functions. pub functions: Vec, + /// Optional aggregate shape metadata for function arguments. + /// + /// Function signatures still use [`ValueType::Agg`] as the runtime ABI kind. These entries add + /// verifier-visible shape facts for specific aggregate arguments, such as a closure environment + /// argument that is known to be a tuple of captured values. + pub function_arg_agg_shapes: Vec, /// Optional program name. pub program_name: Option, /// Optional function-name entries. @@ -296,6 +302,17 @@ pub struct LabelNameEntry { pub name: SymbolId, } +/// Verifier-visible shape metadata for a function argument whose type is [`ValueType::Agg`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FunctionArgAggShapeEntry { + /// Function index within the program. + pub func: u32, + /// Argument index within the function signature. + pub arg: u32, + /// Aggregate shape attached to the argument. + pub shape: AggShape, +} + /// A constant-pool entry stored in a compact representation. #[derive(Clone, Debug, PartialEq, Eq)] pub enum ConstEntry { @@ -369,6 +386,37 @@ pub enum ValueType { Closure, } +/// Verifier-visible aggregate shape metadata. +/// +/// This refines aggregate arguments without changing the runtime ABI kind: signatures still carry +/// [`ValueType::Agg`], while the shape lets the verifier type-check projections from known tuple, +/// struct, or array aggregate arguments. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum AggShape { + /// Tuple aggregate with optional per-element value types. + /// + /// `None` means the tuple element is known to exist but its value type is not known to the + /// verifier. + Tuple { + /// Element type metadata in tuple index order. + elems: Vec>, + }, + /// Struct aggregate with a type-table id. + Struct(TypeId), + /// Array aggregate with an element-type table id. + Array(ElemTypeId), +} + +impl AggShape { + /// Creates a tuple aggregate shape from element type metadata. + #[must_use] + pub fn tuple(elems: impl Into>>) -> Self { + Self::Tuple { + elems: elems.into(), + } + } +} + /// A host-call signature table entry. #[derive(Clone, Debug, PartialEq, Eq)] pub struct HostSigEntry { @@ -687,6 +735,7 @@ impl Program { bytecode_data, spans, functions: packed_functions, + function_arg_agg_shapes: Vec::new(), program_name: None, function_names: Vec::new(), labels: Vec::new(), @@ -717,6 +766,17 @@ impl Program { .and_then(|e| self.symbol_str(e.name).ok()) } + /// Returns aggregate shape metadata for function argument `arg`, if present. + /// + /// This metadata only refines arguments whose declared type is [`ValueType::Agg`]. + #[must_use] + pub fn function_arg_agg_shape(&self, func: u32, arg: u32) -> Option<&AggShape> { + self.function_arg_agg_shapes + .iter() + .find(|e| e.func == func && e.arg == arg) + .map(|e| &e.shape) + } + /// Returns the function input name for `func` and `arg`, if present. #[must_use] pub fn function_input_name(&self, func: u32, arg: u32) -> Option<&str> { @@ -918,6 +978,7 @@ impl Program { // 7 = function_sigs // 8 = host_sigs // 10 = call_sigs (optional) + // 11 = function_arg_agg_shapes (optional) let mut w = Writer::new(); w.write_bytes(MAGIC); w.write_u16_le(VERSION_MAJOR); @@ -1074,6 +1135,18 @@ impl Program { write_section(&mut w, SectionTag::CallSigs, payload.as_slice()); } + // function argument aggregate shape metadata section (optional) + if !self.function_arg_agg_shapes.is_empty() { + let mut payload = Writer::new(); + payload.write_uleb128_u64(self.function_arg_agg_shapes.len() as u64); + for e in &self.function_arg_agg_shapes { + payload.write_uleb128_u32(e.func); + payload.write_uleb128_u32(e.arg); + encode_agg_shape(&mut payload, &e.shape); + } + write_section(&mut w, SectionTag::FunctionArgAggShapes, payload.as_slice()); + } + if self.program_name.is_some() || !self.function_names.is_empty() || !self.labels.is_empty() { let mut payload = Writer::new(); @@ -1151,6 +1224,7 @@ enum SectionTag { HostSigs = 8, Names = 9, CallSigs = 10, + FunctionArgAggShapes = 11, } impl SectionTag { @@ -1166,6 +1240,7 @@ impl SectionTag { 8 => Some(Self::HostSigs), 9 => Some(Self::Names), 10 => Some(Self::CallSigs), + 11 => Some(Self::FunctionArgAggShapes), _ => None, } } @@ -1266,6 +1341,7 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result = Vec::new(); let mut bytecode_blobs: Vec> = Vec::new(); let mut span_tables: Vec> = Vec::new(); + let mut function_arg_agg_shapes: Vec = Vec::new(); let mut names: NamesDef = NamesDef::default(); let mut saw_symbols = false; @@ -1277,6 +1353,7 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result) -> Result { + if saw_function_arg_agg_shapes { + return Err(DecodeError::DuplicateSection); + } + saw_function_arg_agg_shapes = true; + function_arg_agg_shapes = decode_function_arg_agg_shapes(payload)?; + } Some(SectionTag::Names) => { if saw_names { return Err(DecodeError::DuplicateSection); @@ -1587,6 +1671,15 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result= func.arg_count { + return Err(DecodeError::OutOfBounds); + } + } + Ok(Program { symbols, symbol_data, @@ -1601,6 +1694,7 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result) -> Result { }) } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[repr(u8)] +enum AggShapeTag { + Tuple = 1, + Struct = 2, + Array = 3, +} + +impl AggShapeTag { + fn from_u8(v: u8) -> Result { + match v { + 1 => Ok(Self::Tuple), + 2 => Ok(Self::Struct), + 3 => Ok(Self::Array), + _ => Err(DecodeError::OutOfBounds), + } + } +} + +fn encode_agg_shape(w: &mut Writer, shape: &AggShape) { + match shape { + AggShape::Tuple { elems } => { + w.write_u8(AggShapeTag::Tuple as u8); + w.write_uleb128_u64(elems.len() as u64); + for elem in elems { + match elem { + Some(ty) => { + w.write_u8(1); + encode_value_type(w, *ty); + } + None => w.write_u8(0), + } + } + } + AggShape::Struct(type_id) => { + w.write_u8(AggShapeTag::Struct as u8); + w.write_uleb128_u32(type_id.0); + } + AggShape::Array(elem_type_id) => { + w.write_u8(AggShapeTag::Array as u8); + w.write_uleb128_u32(elem_type_id.0); + } + } +} + +fn decode_agg_shape(r: &mut Reader<'_>) -> Result { + Ok(match AggShapeTag::from_u8(r.read_u8()?)? { + AggShapeTag::Tuple => { + let elem_count = read_usize(r)?; + let mut elems = Vec::with_capacity(elem_count); + for _ in 0..elem_count { + let has_type = r.read_u8()?; + if has_type == 0 { + elems.push(None); + } else if has_type == 1 { + elems.push(Some(decode_value_type(r)?)); + } else { + return Err(DecodeError::OutOfBounds); + } + } + AggShape::Tuple { elems } + } + AggShapeTag::Struct => AggShape::Struct(TypeId(r.read_uleb128_u32()?)), + AggShapeTag::Array => AggShape::Array(ElemTypeId(r.read_uleb128_u32()?)), + }) +} + fn encode_types(w: &mut Writer, t: &TypeTable) { w.write_uleb128_u64(t.field_name_ranges.len() as u64); for name in &t.field_name_ranges { @@ -1956,6 +2117,25 @@ fn decode_call_sigs(payload: &[u8]) -> Result, DecodeError> Ok(out) } +fn decode_function_arg_agg_shapes( + payload: &[u8], +) -> Result, DecodeError> { + let mut r = Reader::new(payload); + let n = read_usize(&mut r)?; + let mut out = Vec::with_capacity(n); + for _ in 0..n { + out.push(FunctionArgAggShapeEntry { + func: r.read_uleb128_u32()?, + arg: r.read_uleb128_u32()?, + shape: decode_agg_shape(&mut r)?, + }); + } + if r.offset() != payload.len() { + return Err(DecodeError::OutOfBounds); + } + Ok(out) +} + #[derive(Clone, Debug, PartialEq, Eq)] struct FunctionTableEntry { arg_count: u32, @@ -2122,6 +2302,57 @@ mod tests { assert_eq!(back, p); } + #[test] + fn program_roundtrips_function_arg_agg_shapes() { + let mut p = Program::new( + vec![], + vec![], + vec![], + TypeTableDef { + structs: vec![StructTypeDef { + field_names: vec!["count".into()], + field_types: vec![ValueType::I64], + }], + array_elems: vec![ValueType::Bool], + }, + vec![FunctionDef { + arg_types: vec![ValueType::Agg, ValueType::Agg, ValueType::Agg], + ret_types: vec![], + reg_count: 4, + bytecode: vec![], + spans: vec![], + }], + ); + p.function_arg_agg_shapes = vec![ + FunctionArgAggShapeEntry { + func: 0, + arg: 0, + shape: AggShape::tuple(vec![Some(ValueType::I64), None]), + }, + FunctionArgAggShapeEntry { + func: 0, + arg: 1, + shape: AggShape::Struct(TypeId(0)), + }, + FunctionArgAggShapeEntry { + func: 0, + arg: 2, + shape: AggShape::Array(ElemTypeId(0)), + }, + ]; + + let bytes = p.encode(); + let tags = section_tags(&bytes); + assert!(tags.contains(&(SectionTag::FunctionArgAggShapes as u8))); + + let back = Program::decode(&bytes).unwrap(); + assert_eq!(back, p); + assert_eq!( + back.function_arg_agg_shape(0, 0), + Some(&AggShape::tuple(vec![Some(ValueType::I64), None])) + ); + } + #[test] fn program_roundtrips_with_function_input_and_output_names() { let mut p = Program::new( diff --git a/execution_tape/src/verifier.rs b/execution_tape/src/verifier.rs index 3745b20..d9621df 100644 --- a/execution_tape/src/verifier.rs +++ b/execution_tape/src/verifier.rs @@ -21,8 +21,8 @@ use crate::host::{SigHash, sig_hash_slices}; use crate::instr_operands; use crate::opcode::Opcode; use crate::program::{ - CallSigId, ConstEntry, ElemTypeId, Function, Program, SpanEntry, SpanId, SymbolId, TypeId, - ValueType, + AggShape, CallSigId, ConstEntry, ElemTypeId, Function, Program, SpanEntry, SpanId, SymbolId, + TypeId, ValueType, }; use crate::typed::{ AggReg, BoolReg, BytesReg, ClosureReg, DecimalReg, ExecDecoded, ExecFunc, ExecInstr, F64Reg, @@ -213,6 +213,52 @@ pub enum VerifyError { /// Function index within the program. func: u32, }, + /// Function argument aggregate-shape metadata references an unknown function. + FunctionArgAggShapeFuncOutOfBounds { + /// Function index referenced by the metadata entry. + func: u32, + }, + /// Function argument aggregate-shape metadata references an invalid argument index. + FunctionArgAggShapeArgOutOfBounds { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + }, + /// Function argument aggregate-shape metadata was duplicated for the same argument. + FunctionArgAggShapeDuplicate { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + }, + /// Function argument aggregate-shape metadata was attached to a non-aggregate argument. + FunctionArgAggShapeOnNonAggArg { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + /// Actual declared argument type. + actual: ValueType, + }, + /// Function argument aggregate-shape metadata references an unknown struct type. + FunctionArgAggShapeStructTypeOutOfBounds { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + /// Struct type id. + type_id: u32, + }, + /// Function argument aggregate-shape metadata references an unknown array element type. + FunctionArgAggShapeArrayElemTypeOutOfBounds { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + /// Element type id. + elem_type_id: u32, + }, /// A function uses a value type not yet supported by the verifier/runtime register model. UnsupportedValueType { /// Function index within the program. @@ -631,6 +677,46 @@ impl fmt::Display for VerifyError { Self::FunctionSigCountMismatch { func } => { write!(f, "function {func} signature count mismatch") } + Self::FunctionArgAggShapeFuncOutOfBounds { func } => { + write!( + f, + "function argument aggregate shape references unknown function {func}" + ) + } + Self::FunctionArgAggShapeArgOutOfBounds { func, arg } => { + write!( + f, + "function {func} aggregate shape arg index out of bounds: {arg}" + ) + } + Self::FunctionArgAggShapeDuplicate { func, arg } => { + write!( + f, + "function {func} aggregate shape is duplicated for arg {arg}" + ) + } + Self::FunctionArgAggShapeOnNonAggArg { func, arg, actual } => { + write!( + f, + "function {func} aggregate shape on non-Agg arg {arg} (got {actual:?})" + ) + } + Self::FunctionArgAggShapeStructTypeOutOfBounds { func, arg, type_id } => { + write!( + f, + "function {func} aggregate shape for arg {arg} references unknown struct type_id {type_id}" + ) + } + Self::FunctionArgAggShapeArrayElemTypeOutOfBounds { + func, + arg, + elem_type_id, + } => { + write!( + f, + "function {func} aggregate shape for arg {arg} references unknown array elem_type_id {elem_type_id}" + ) + } Self::UnsupportedValueType { func, value_type } => { write!( f, @@ -917,6 +1003,7 @@ pub fn verify_program(program: &Program, cfg: &VerifyConfig) -> Result<(), Verif verify_host_sigs(program)?; verify_call_sigs(program)?; verify_function_value_names(program)?; + verify_function_arg_agg_shapes(program)?; for (i, func) in program.functions.iter().enumerate() { let func_id = u32::try_from(i).unwrap_or(u32::MAX); @@ -933,6 +1020,7 @@ pub fn verify_program_with_lints( verify_host_sigs(program)?; verify_call_sigs(program)?; verify_function_value_names(program)?; + verify_function_arg_agg_shapes(program)?; let mut lints: Vec = Vec::new(); for (i, func) in program.functions.iter().enumerate() { @@ -951,6 +1039,7 @@ pub fn verify_program_owned( verify_host_sigs(&program)?; verify_call_sigs(&program)?; verify_function_value_names(&program)?; + verify_function_arg_agg_shapes(&program)?; let signature_cache = build_signature_cache(&program)?; let mut verified_functions: Vec = Vec::with_capacity(program.functions.len()); @@ -974,6 +1063,7 @@ pub fn verify_program_owned_with_lints( verify_host_sigs(&program)?; verify_call_sigs(&program)?; verify_function_value_names(&program)?; + verify_function_arg_agg_shapes(&program)?; let signature_cache = build_signature_cache(&program)?; let mut verified_functions: Vec = Vec::with_capacity(program.functions.len()); @@ -1150,6 +1240,71 @@ fn verify_function_value_names(program: &Program) -> Result<(), VerifyError> { Ok(()) } +fn verify_function_arg_agg_shapes(program: &Program) -> Result<(), VerifyError> { + let mut seen: Vec<(u32, u32)> = Vec::new(); + + for e in &program.function_arg_agg_shapes { + let Some(func) = program.functions.get(e.func as usize) else { + return Err(VerifyError::FunctionArgAggShapeFuncOutOfBounds { func: e.func }); + }; + let arg_types = func + .arg_types(program) + .map_err(|_| VerifyError::FunctionArgTypesOutOfBounds { func: e.func })?; + let Some(actual) = arg_types.get(e.arg as usize).copied() else { + return Err(VerifyError::FunctionArgAggShapeArgOutOfBounds { + func: e.func, + arg: e.arg, + }); + }; + if !seen + .iter() + .any(|&(func, arg)| func == e.func && arg == e.arg) + { + seen.push((e.func, e.arg)); + } else { + return Err(VerifyError::FunctionArgAggShapeDuplicate { + func: e.func, + arg: e.arg, + }); + } + if actual != ValueType::Agg { + return Err(VerifyError::FunctionArgAggShapeOnNonAggArg { + func: e.func, + arg: e.arg, + actual, + }); + } + match &e.shape { + AggShape::Tuple { .. } => {} + AggShape::Struct(type_id) => { + if program.types.structs.get(type_id.0 as usize).is_none() { + return Err(VerifyError::FunctionArgAggShapeStructTypeOutOfBounds { + func: e.func, + arg: e.arg, + type_id: type_id.0, + }); + } + } + AggShape::Array(elem_type_id) => { + if program + .types + .array_elems + .get(elem_type_id.0 as usize) + .is_none() + { + return Err(VerifyError::FunctionArgAggShapeArrayElemTypeOutOfBounds { + func: e.func, + arg: e.arg, + elem_type_id: elem_type_id.0, + }); + } + } + } + } + + Ok(()) +} + fn verify_host_sigs(program: &Program) -> Result<(), VerifyError> { for (i, hs) in program.host_sigs.iter().enumerate() { let host_sig = u32::try_from(i).unwrap_or(u32::MAX); @@ -1466,7 +1621,8 @@ fn verify_function_bytecode( } // Type analysis + validation. - let entry_types = initial_types(reg_count, arg_types); + let arg_agg_metas = initial_arg_agg_metas(program, func_id, arg_types); + let entry_types = initial_types(reg_count, arg_types, &arg_agg_metas); let (type_in, type_out) = compute_must_types( program, &blocks, @@ -2784,6 +2940,16 @@ enum AggMeta { Array(ElemTypeId), } +impl From<&AggShape> for AggMeta { + fn from(shape: &AggShape) -> Self { + match shape { + AggShape::Tuple { elems } => Self::Tuple(elems.clone()), + AggShape::Struct(type_id) => Self::Struct(*type_id), + AggShape::Array(elem_type_id) => Self::Array(*elem_type_id), + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum RegType { Uninit, @@ -2803,20 +2969,45 @@ struct TypeState { aggs: Vec>, } -fn initial_types(reg_count: usize, arg_types: &[ValueType]) -> TypeState { +fn initial_types( + reg_count: usize, + arg_types: &[ValueType], + arg_agg_metas: &[Option], +) -> TypeState { let mut values: Vec> = vec![Some(RegType::Uninit); reg_count]; - let aggs: Vec> = vec![None; reg_count]; + let mut aggs: Vec> = vec![None; reg_count]; if reg_count != 0 { values[0] = Some(RegType::Concrete(ValueType::Unit)); // effect token for (i, &t) in arg_types.iter().enumerate() { if 1 + i < reg_count { values[1 + i] = Some(RegType::Concrete(t)); + if t == ValueType::Agg { + aggs[1 + i] = arg_agg_metas.get(i).cloned().unwrap_or(None); + } } } } TypeState { values, aggs } } +fn initial_arg_agg_metas( + program: &Program, + func_id: u32, + arg_types: &[ValueType], +) -> Vec> { + let mut out = vec![None; arg_types.len()]; + for e in program + .function_arg_agg_shapes + .iter() + .filter(|e| e.func == func_id) + { + if let Some(slot) = out.get_mut(e.arg as usize) { + *slot = Some(AggMeta::from(&e.shape)); + } + } + out +} + fn meet_value(a: Option, b: Option) -> Option { match (a, b) { // Unknown/top (used for initialization) doesn't constrain the result. @@ -3951,8 +4142,8 @@ mod tests { use crate::asm::{BuildError, FunctionSig, ProgramBuilder}; use crate::opcode::Opcode; use crate::program::{ - ByteRange, CallSigEntry, Const, FunctionDef, HostSymbol, Program, SpanId, StructTypeDef, - TypeTableDef, ValueType, + AggShape, ByteRange, CallSigEntry, Const, FunctionDef, HostSymbol, Program, SpanId, + StructTypeDef, TypeTableDef, ValueType, }; use crate::value::FuncId; use alloc::vec; @@ -4959,6 +5150,53 @@ mod tests { pb.build_checked().unwrap(); } + #[test] + fn verifier_types_tuple_get_from_abi_arg_shape() { + let mut a = Asm::new(); + a.tuple_get(2, 1, 0); + a.i64_add(3, 2, 2); + a.ret(0, &[3]); + + let mut pb = ProgramBuilder::new(); + pb.push_function_checked( + a, + FunctionSig { + arg_types: vec![ValueType::Agg], + ret_types: vec![ValueType::I64], + } + .with_arg_agg_shape(0, AggShape::tuple(vec![Some(ValueType::I64)])), + ) + .unwrap(); + pb.build_checked().unwrap(); + } + + #[test] + fn verifier_rejects_abi_arg_shape_on_non_agg_arg() { + let mut a = Asm::new(); + a.ret(0, &[]); + + let mut pb = ProgramBuilder::new(); + pb.push_function_checked( + a, + FunctionSig { + arg_types: vec![ValueType::I64], + ret_types: vec![], + } + .with_arg_agg_shape(0, AggShape::tuple(vec![])), + ) + .unwrap(); + let p = pb.build(); + + assert_eq!( + verify_program(&p, &VerifyConfig::default()), + Err(VerifyError::FunctionArgAggShapeOnNonAggArg { + func: 0, + arg: 0, + actual: ValueType::I64, + }) + ); + } + #[test] fn verifier_rejects_tuple_get_oob_index() { let mut a = Asm::new(); diff --git a/execution_tape/src/vm.rs b/execution_tape/src/vm.rs index 914f2c0..d52ce50 100644 --- a/execution_tape/src/vm.rs +++ b/execution_tape/src/vm.rs @@ -3114,7 +3114,9 @@ mod tests { use crate::asm::{Asm, FunctionSig, ProgramBuilder}; use crate::bytecode::Instr; use crate::host::{AccessSink, HostContext, HostSig, ResourceKeyRef, SigHash}; - use crate::program::{ByteRange, CallSigEntry, FunctionDef, Program, TypeTableDef, ValueType}; + use crate::program::{ + AggShape, ByteRange, CallSigEntry, FunctionDef, Program, TypeTableDef, ValueType, + }; use crate::trace::{TraceMask, TraceOutcome, TraceSink}; use crate::value::AggType; use crate::verifier::{VerifyConfig, verify_program_owned}; @@ -3882,6 +3884,48 @@ mod tests { assert_eq!(out, vec![Value::U64(2)]); } + #[test] + fn vm_executes_closure_that_reads_typed_tuple_env() { + let mut pb = ProgramBuilder::new(); + let call_sig = pb.call_sig(&[ValueType::I64], &[ValueType::I64]); + + let mut entry = Asm::new(); + entry.const_i64(1, 40); + entry.tuple_new(2, &[1]); + entry.const_func(3, FuncId(1)); + entry.closure_new(4, 3, 2); + entry.const_i64(5, 2); + entry.call_indirect(0, call_sig, 4, 0, &[5], &[6]); + entry.ret(0, &[6]); + pb.push_function_checked( + entry, + FunctionSig { + arg_types: vec![], + ret_types: vec![ValueType::I64], + }, + ) + .unwrap(); + + let mut body = Asm::new(); + body.tuple_get(3, 1, 0); + body.i64_add(4, 3, 2); + body.ret(0, &[4]); + pb.push_function_checked( + body, + FunctionSig::closure_body_with_env_shape( + &[ValueType::I64], + &[ValueType::I64], + AggShape::tuple(vec![Some(ValueType::I64)]), + ), + ) + .unwrap(); + + let p = pb.build_verified().unwrap(); + let mut vm = Vm::new(TestHost, Limits::default()); + let out = vm.run(&p, FuncId(0), &[], TraceMask::NONE, None).unwrap(); + assert_eq!(out, vec![Value::I64(42)]); + } + #[test] fn vm_traps_call_indirect_on_signature_arity_mismatch() { let entry_bytecode = crate::bytecode::encode_instructions(&[