diff --git a/execution_tape/src/asm.rs b/execution_tape/src/asm.rs index 7f0e26a..94ed4e8 100644 --- a/execution_tape/src/asm.rs +++ b/execution_tape/src/asm.rs @@ -18,9 +18,10 @@ use crate::format::{write_sleb128_i64, write_uleb128_u64}; use crate::host::HostSig; use crate::opcode::Opcode; use crate::program::{ - ByteRange, CallSigEntry, CallSigId, Const, ConstId, ElemTypeId, FunctionDef, FunctionNameEntry, - HostSigDef, HostSigId, HostSymbol, LabelNameEntry, Program, SpanEntry, SpanId, StructTypeDef, - SymbolId, TypeId, TypeTableDef, ValueType, + AggShape, ByteRange, CallSigEntry, CallSigId, Const, ConstId, ElemTypeId, + FunctionArgAggShapeEntry, FunctionDef, FunctionNameEntry, HostSigDef, HostSigId, HostSymbol, + LabelNameEntry, Program, SpanEntry, SpanId, StructTypeDef, SymbolId, TypeId, TypeTableDef, + ValueType, }; use crate::value::Decimal; use crate::value::FuncId; @@ -262,6 +263,85 @@ impl FunctionSig { ret_types: call_ret_types.to_vec(), } } + + /// Attaches aggregate shape metadata to argument `arg`. + /// + /// The argument's signature type remains [`ValueType::Agg`]. The shape is verifier metadata + /// used to type-check aggregate projections from ABI-provided values. + #[must_use] + pub fn with_arg_agg_shape(self, arg: u32, shape: AggShape) -> FunctionAbi { + let mut abi = FunctionAbi::from(self); + abi.set_arg_agg_shape(arg, shape); + abi + } + + /// Creates a closure body ABI and attaches shape metadata to the injected environment argument. + /// + /// This is the typed-shape counterpart to [`FunctionSig::closure_body`]. It preserves the + /// caller-visible call signature while telling the verifier what shape the hidden closure + /// environment argument has inside the body. + #[must_use] + pub fn closure_body_with_env_shape( + call_arg_types: &[ValueType], + call_ret_types: &[ValueType], + env_shape: AggShape, + ) -> FunctionAbi { + Self::closure_body(call_arg_types, call_ret_types).with_arg_agg_shape(0, env_shape) + } +} + +/// Function ABI metadata used by [`ProgramBuilder`]. +/// +/// The signature carries runtime value kinds, while `arg_agg_shapes` carries optional verifier +/// refinements for arguments whose runtime kind is [`ValueType::Agg`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FunctionAbi { + /// Runtime function signature. + pub sig: FunctionSig, + /// Optional aggregate shape metadata by argument index. + /// + /// Entries beyond the signature arity are preserved so checked builders can report a verifier + /// error instead of silently dropping caller intent. + pub arg_agg_shapes: Vec>, +} + +impl FunctionAbi { + /// Creates ABI metadata from a runtime function signature with no aggregate shape refinements. + #[must_use] + pub fn new(sig: FunctionSig) -> Self { + let arg_agg_shapes = vec![None; sig.arg_types.len()]; + Self { + sig, + arg_agg_shapes, + } + } + + /// Attaches aggregate shape metadata to argument `arg`. + pub fn set_arg_agg_shape(&mut self, arg: u32, shape: AggShape) { + let arg = arg as usize; + if self.arg_agg_shapes.len() <= arg { + self.arg_agg_shapes.resize_with(arg + 1, || None); + } + self.arg_agg_shapes[arg] = Some(shape); + } +} + +impl From for FunctionAbi { + fn from(sig: FunctionSig) -> Self { + Self::new(sig) + } +} + +impl From<&FunctionSig> for FunctionAbi { + fn from(sig: &FunctionSig) -> Self { + Self::new(sig.clone()) + } +} + +impl From<&Self> for FunctionAbi { + fn from(abi: &Self) -> Self { + abi.clone() + } } /// Convenience builder for constructing small [`Program`]s. @@ -306,6 +386,7 @@ pub struct ProgramBuilder { host_sigs: Vec, types: TypeTableDef, functions: Vec, + function_arg_agg_shapes: Vec, program_name: Option, function_names: Vec, labels: Vec, @@ -566,11 +647,68 @@ impl ProgramBuilder { id } + fn store_function_arg_agg_shapes( + &mut self, + func: FuncId, + arg_agg_shapes: Vec>, + ) { + self.function_arg_agg_shapes.retain(|e| e.func != func.0); + for (arg, shape) in arg_agg_shapes.into_iter().enumerate() { + let Some(shape) = shape else { + continue; + }; + self.function_arg_agg_shapes.push(FunctionArgAggShapeEntry { + func: func.0, + arg: u32::try_from(arg).unwrap_or(u32::MAX), + shape, + }); + } + } + + /// Sets aggregate shape metadata for a function argument. + /// + /// This refines an argument whose declared type is [`ValueType::Agg`] so the verifier can + /// type-check aggregate projections from ABI-provided values. + pub fn set_function_arg_agg_shape( + &mut self, + func: FuncId, + arg: u32, + shape: AggShape, + ) -> Result<(), BuildError> { + let Some(def) = self.functions.get(func.0 as usize) else { + return Err(BuildError::BadFuncId { func: func.0 }); + }; + if (arg as usize) >= def.arg_types.len() { + let arg_count = u32::try_from(def.arg_types.len()).unwrap_or(u32::MAX); + return Err(BuildError::BadArgIndex { + func: func.0, + arg, + arg_count, + }); + } + if let Some(entry) = self + .function_arg_agg_shapes + .iter_mut() + .find(|e| e.func == func.0 && e.arg == arg) + { + entry.shape = shape; + } else { + self.function_arg_agg_shapes.push(FunctionArgAggShapeEntry { + func: func.0, + arg, + shape, + }); + } + Ok(()) + } + /// Declares a function signature and returns its [`FuncId`]. /// /// This is useful when assembling mutually recursive or out-of-order functions: you can /// declare all functions up front, then reference them by [`FuncId`] in [`Asm::call`]. - pub fn declare_function(&mut self, sig: FunctionSig) -> FuncId { + pub fn declare_function(&mut self, abi: impl Into) -> FuncId { + let abi = abi.into(); + let sig = abi.sig; let reg_count = min_reg_count_for_arg_count(sig.arg_types.len()); let id = FuncId(u32::try_from(self.functions.len()).unwrap_or(u32::MAX)); self.functions.push(FunctionDef { @@ -580,6 +718,7 @@ impl ProgramBuilder { bytecode: Vec::new(), spans: Vec::new(), }); + self.store_function_arg_agg_shapes(id, abi.arg_agg_shapes); id } @@ -636,7 +775,13 @@ impl ProgramBuilder { /// /// This resolves labels and records the typed signature. Full verification (including host-call /// signature checks and cross-function call checks) is performed by `build_checked`. - pub fn push_function_checked(&mut self, a: Asm, sig: FunctionSig) -> Result { + pub fn push_function_checked( + &mut self, + a: Asm, + abi: impl Into, + ) -> Result { + let abi = abi.into(); + let sig = abi.sig; let reg_count = a.inferred_reg_count_for_args(sig.arg_types.len()); let parts = a.finish_parts()?; let id = FuncId(u32::try_from(self.functions.len()).unwrap_or(u32::MAX)); @@ -655,6 +800,7 @@ impl ProgramBuilder { name: sym, }); } + self.store_function_arg_agg_shapes(id, abi.arg_agg_shapes); Ok(id) } @@ -691,6 +837,7 @@ impl ProgramBuilder { p.program_name = self.program_name; p.function_names = self.function_names; p.labels = self.labels; + p.function_arg_agg_shapes = self.function_arg_agg_shapes; let func_count = p.functions.len(); let mut has_arg_names = vec![false; func_count]; @@ -957,34 +1104,36 @@ impl Asm { /// /// This constructs a tiny single-function [`Program`] wrapper and runs the verifier. It is /// intended as a quick sanity check for builder users. - pub fn finish_checked(self, sig: &FunctionSig) -> Result, AsmError> { - self.finish_checked_with(sig, &VerifyConfig::default()) + pub fn finish_checked(self, abi: impl Into) -> Result, AsmError> { + self.finish_checked_with(abi, &VerifyConfig::default()) } /// Finalizes, then verifies the resulting bytecode and span table under `sig`. - pub fn finish_checked_parts(self, sig: &FunctionSig) -> Result { - self.finish_checked_parts_with(sig, &VerifyConfig::default()) + pub fn finish_checked_parts(self, abi: impl Into) -> Result { + self.finish_checked_parts_with(abi, &VerifyConfig::default()) } /// Finalizes, then verifies the resulting bytecode under `sig` with a custom verifier config. pub fn finish_checked_with( self, - sig: &FunctionSig, + abi: impl Into, cfg: &VerifyConfig, ) -> Result, AsmError> { - Ok(self.finish_checked_parts_with(sig, cfg)?.bytecode) + Ok(self.finish_checked_parts_with(abi, cfg)?.bytecode) } /// Finalizes, then verifies the resulting bytecode and span table under `sig` with a custom /// verifier config. pub fn finish_checked_parts_with( self, - sig: &FunctionSig, + abi: impl Into, cfg: &VerifyConfig, ) -> Result { + let abi = abi.into(); + let sig = abi.sig; let reg_count = self.inferred_reg_count_for_args(sig.arg_types.len()); let parts = self.finish_parts()?; - let p = Program::new( + let mut p = Program::new( Vec::new(), Vec::new(), Vec::new(), @@ -997,6 +1146,16 @@ impl Asm { spans: parts.spans.clone(), }], ); + for (arg, shape) in abi.arg_agg_shapes.into_iter().enumerate() { + let Some(shape) = shape else { + continue; + }; + p.function_arg_agg_shapes.push(FunctionArgAggShapeEntry { + func: 0, + arg: u32::try_from(arg).unwrap_or(u32::MAX), + shape, + }); + } verify_program(&p, cfg)?; Ok(parts) } @@ -2300,6 +2459,52 @@ mod tests { ); } + #[test] + fn function_sig_closure_body_with_env_shape_marks_env_arg() { + let abi = FunctionSig::closure_body_with_env_shape( + &[ValueType::I64], + &[ValueType::I64], + AggShape::tuple(vec![Some(ValueType::I64)]), + ); + + assert_eq!( + abi.sig, + FunctionSig { + arg_types: vec![ValueType::Agg, ValueType::I64], + ret_types: vec![ValueType::I64], + } + ); + assert_eq!( + abi.arg_agg_shapes, + vec![Some(AggShape::tuple(vec![Some(ValueType::I64)])), None] + ); + } + + #[test] + fn program_builder_preserves_function_arg_agg_shapes() { + let mut pb = ProgramBuilder::new(); + let mut a = Asm::new(); + a.tuple_get(2, 1, 0); + a.ret(0, &[2]); + + let func = pb + .push_function_checked( + a, + FunctionSig { + arg_types: vec![ValueType::Agg], + ret_types: vec![ValueType::I64], + } + .with_arg_agg_shape(0, AggShape::tuple(vec![Some(ValueType::I64)])), + ) + .unwrap(); + + let program = pb.build_checked().unwrap(); + assert_eq!( + program.function_arg_agg_shape(func.0, 0), + Some(&AggShape::tuple(vec![Some(ValueType::I64)])) + ); + } + #[test] fn program_builder_build_verified_preserves_call_sigs_for_call_indirect() { let mut pb = ProgramBuilder::new(); diff --git a/execution_tape/src/program.rs b/execution_tape/src/program.rs index 27e2141..1e5b891 100644 --- a/execution_tape/src/program.rs +++ b/execution_tape/src/program.rs @@ -262,6 +262,12 @@ pub struct Program { pub spans: Vec, /// Program functions. pub functions: Vec, + /// Optional aggregate shape metadata for function arguments. + /// + /// Function signatures still use [`ValueType::Agg`] as the runtime ABI kind. These entries add + /// verifier-visible shape facts for specific aggregate arguments, such as a closure environment + /// argument that is known to be a tuple of captured values. + pub function_arg_agg_shapes: Vec, /// Optional program name. pub program_name: Option, /// Optional function-name entries. @@ -296,6 +302,17 @@ pub struct LabelNameEntry { pub name: SymbolId, } +/// Verifier-visible shape metadata for a function argument whose type is [`ValueType::Agg`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FunctionArgAggShapeEntry { + /// Function index within the program. + pub func: u32, + /// Argument index within the function signature. + pub arg: u32, + /// Aggregate shape attached to the argument. + pub shape: AggShape, +} + /// A constant-pool entry stored in a compact representation. #[derive(Clone, Debug, PartialEq, Eq)] pub enum ConstEntry { @@ -369,6 +386,37 @@ pub enum ValueType { Closure, } +/// Verifier-visible aggregate shape metadata. +/// +/// This refines aggregate arguments without changing the runtime ABI kind: signatures still carry +/// [`ValueType::Agg`], while the shape lets the verifier type-check projections from known tuple, +/// struct, or array aggregate arguments. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum AggShape { + /// Tuple aggregate with optional per-element value types. + /// + /// `None` means the tuple element is known to exist but its value type is not known to the + /// verifier. + Tuple { + /// Element type metadata in tuple index order. + elems: Vec>, + }, + /// Struct aggregate with a type-table id. + Struct(TypeId), + /// Array aggregate with an element-type table id. + Array(ElemTypeId), +} + +impl AggShape { + /// Creates a tuple aggregate shape from element type metadata. + #[must_use] + pub fn tuple(elems: impl Into>>) -> Self { + Self::Tuple { + elems: elems.into(), + } + } +} + /// A host-call signature table entry. #[derive(Clone, Debug, PartialEq, Eq)] pub struct HostSigEntry { @@ -687,6 +735,7 @@ impl Program { bytecode_data, spans, functions: packed_functions, + function_arg_agg_shapes: Vec::new(), program_name: None, function_names: Vec::new(), labels: Vec::new(), @@ -717,6 +766,17 @@ impl Program { .and_then(|e| self.symbol_str(e.name).ok()) } + /// Returns aggregate shape metadata for function argument `arg`, if present. + /// + /// This metadata only refines arguments whose declared type is [`ValueType::Agg`]. + #[must_use] + pub fn function_arg_agg_shape(&self, func: u32, arg: u32) -> Option<&AggShape> { + self.function_arg_agg_shapes + .iter() + .find(|e| e.func == func && e.arg == arg) + .map(|e| &e.shape) + } + /// Returns the function input name for `func` and `arg`, if present. #[must_use] pub fn function_input_name(&self, func: u32, arg: u32) -> Option<&str> { @@ -918,6 +978,7 @@ impl Program { // 7 = function_sigs // 8 = host_sigs // 10 = call_sigs (optional) + // 11 = function_arg_agg_shapes (optional) let mut w = Writer::new(); w.write_bytes(MAGIC); w.write_u16_le(VERSION_MAJOR); @@ -1074,6 +1135,18 @@ impl Program { write_section(&mut w, SectionTag::CallSigs, payload.as_slice()); } + // function argument aggregate shape metadata section (optional) + if !self.function_arg_agg_shapes.is_empty() { + let mut payload = Writer::new(); + payload.write_uleb128_u64(self.function_arg_agg_shapes.len() as u64); + for e in &self.function_arg_agg_shapes { + payload.write_uleb128_u32(e.func); + payload.write_uleb128_u32(e.arg); + encode_agg_shape(&mut payload, &e.shape); + } + write_section(&mut w, SectionTag::FunctionArgAggShapes, payload.as_slice()); + } + if self.program_name.is_some() || !self.function_names.is_empty() || !self.labels.is_empty() { let mut payload = Writer::new(); @@ -1151,6 +1224,7 @@ enum SectionTag { HostSigs = 8, Names = 9, CallSigs = 10, + FunctionArgAggShapes = 11, } impl SectionTag { @@ -1166,6 +1240,7 @@ impl SectionTag { 8 => Some(Self::HostSigs), 9 => Some(Self::Names), 10 => Some(Self::CallSigs), + 11 => Some(Self::FunctionArgAggShapes), _ => None, } } @@ -1266,6 +1341,7 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result = Vec::new(); let mut bytecode_blobs: Vec> = Vec::new(); let mut span_tables: Vec> = Vec::new(); + let mut function_arg_agg_shapes: Vec = Vec::new(); let mut names: NamesDef = NamesDef::default(); let mut saw_symbols = false; @@ -1277,6 +1353,7 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result) -> Result { + if saw_function_arg_agg_shapes { + return Err(DecodeError::DuplicateSection); + } + saw_function_arg_agg_shapes = true; + function_arg_agg_shapes = decode_function_arg_agg_shapes(payload)?; + } Some(SectionTag::Names) => { if saw_names { return Err(DecodeError::DuplicateSection); @@ -1587,6 +1671,15 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result= func.arg_count { + return Err(DecodeError::OutOfBounds); + } + } + Ok(Program { symbols, symbol_data, @@ -1601,6 +1694,7 @@ fn decode_current(bytes: &[u8], mut r: Reader<'_>) -> Result) -> Result { }) } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[repr(u8)] +enum AggShapeTag { + Tuple = 1, + Struct = 2, + Array = 3, +} + +impl AggShapeTag { + fn from_u8(v: u8) -> Result { + match v { + 1 => Ok(Self::Tuple), + 2 => Ok(Self::Struct), + 3 => Ok(Self::Array), + _ => Err(DecodeError::OutOfBounds), + } + } +} + +fn encode_agg_shape(w: &mut Writer, shape: &AggShape) { + match shape { + AggShape::Tuple { elems } => { + w.write_u8(AggShapeTag::Tuple as u8); + w.write_uleb128_u64(elems.len() as u64); + for elem in elems { + match elem { + Some(ty) => { + w.write_u8(1); + encode_value_type(w, *ty); + } + None => w.write_u8(0), + } + } + } + AggShape::Struct(type_id) => { + w.write_u8(AggShapeTag::Struct as u8); + w.write_uleb128_u32(type_id.0); + } + AggShape::Array(elem_type_id) => { + w.write_u8(AggShapeTag::Array as u8); + w.write_uleb128_u32(elem_type_id.0); + } + } +} + +fn decode_agg_shape(r: &mut Reader<'_>) -> Result { + Ok(match AggShapeTag::from_u8(r.read_u8()?)? { + AggShapeTag::Tuple => { + let elem_count = read_usize(r)?; + let mut elems = Vec::with_capacity(elem_count); + for _ in 0..elem_count { + let has_type = r.read_u8()?; + if has_type == 0 { + elems.push(None); + } else if has_type == 1 { + elems.push(Some(decode_value_type(r)?)); + } else { + return Err(DecodeError::OutOfBounds); + } + } + AggShape::Tuple { elems } + } + AggShapeTag::Struct => AggShape::Struct(TypeId(r.read_uleb128_u32()?)), + AggShapeTag::Array => AggShape::Array(ElemTypeId(r.read_uleb128_u32()?)), + }) +} + fn encode_types(w: &mut Writer, t: &TypeTable) { w.write_uleb128_u64(t.field_name_ranges.len() as u64); for name in &t.field_name_ranges { @@ -1956,6 +2117,25 @@ fn decode_call_sigs(payload: &[u8]) -> Result, DecodeError> Ok(out) } +fn decode_function_arg_agg_shapes( + payload: &[u8], +) -> Result, DecodeError> { + let mut r = Reader::new(payload); + let n = read_usize(&mut r)?; + let mut out = Vec::with_capacity(n); + for _ in 0..n { + out.push(FunctionArgAggShapeEntry { + func: r.read_uleb128_u32()?, + arg: r.read_uleb128_u32()?, + shape: decode_agg_shape(&mut r)?, + }); + } + if r.offset() != payload.len() { + return Err(DecodeError::OutOfBounds); + } + Ok(out) +} + #[derive(Clone, Debug, PartialEq, Eq)] struct FunctionTableEntry { arg_count: u32, @@ -2122,6 +2302,57 @@ mod tests { assert_eq!(back, p); } + #[test] + fn program_roundtrips_function_arg_agg_shapes() { + let mut p = Program::new( + vec![], + vec![], + vec![], + TypeTableDef { + structs: vec![StructTypeDef { + field_names: vec!["count".into()], + field_types: vec![ValueType::I64], + }], + array_elems: vec![ValueType::Bool], + }, + vec![FunctionDef { + arg_types: vec![ValueType::Agg, ValueType::Agg, ValueType::Agg], + ret_types: vec![], + reg_count: 4, + bytecode: vec![], + spans: vec![], + }], + ); + p.function_arg_agg_shapes = vec![ + FunctionArgAggShapeEntry { + func: 0, + arg: 0, + shape: AggShape::tuple(vec![Some(ValueType::I64), None]), + }, + FunctionArgAggShapeEntry { + func: 0, + arg: 1, + shape: AggShape::Struct(TypeId(0)), + }, + FunctionArgAggShapeEntry { + func: 0, + arg: 2, + shape: AggShape::Array(ElemTypeId(0)), + }, + ]; + + let bytes = p.encode(); + let tags = section_tags(&bytes); + assert!(tags.contains(&(SectionTag::FunctionArgAggShapes as u8))); + + let back = Program::decode(&bytes).unwrap(); + assert_eq!(back, p); + assert_eq!( + back.function_arg_agg_shape(0, 0), + Some(&AggShape::tuple(vec![Some(ValueType::I64), None])) + ); + } + #[test] fn program_roundtrips_with_function_input_and_output_names() { let mut p = Program::new( diff --git a/execution_tape/src/verifier.rs b/execution_tape/src/verifier.rs index 3745b20..d9621df 100644 --- a/execution_tape/src/verifier.rs +++ b/execution_tape/src/verifier.rs @@ -21,8 +21,8 @@ use crate::host::{SigHash, sig_hash_slices}; use crate::instr_operands; use crate::opcode::Opcode; use crate::program::{ - CallSigId, ConstEntry, ElemTypeId, Function, Program, SpanEntry, SpanId, SymbolId, TypeId, - ValueType, + AggShape, CallSigId, ConstEntry, ElemTypeId, Function, Program, SpanEntry, SpanId, SymbolId, + TypeId, ValueType, }; use crate::typed::{ AggReg, BoolReg, BytesReg, ClosureReg, DecimalReg, ExecDecoded, ExecFunc, ExecInstr, F64Reg, @@ -213,6 +213,52 @@ pub enum VerifyError { /// Function index within the program. func: u32, }, + /// Function argument aggregate-shape metadata references an unknown function. + FunctionArgAggShapeFuncOutOfBounds { + /// Function index referenced by the metadata entry. + func: u32, + }, + /// Function argument aggregate-shape metadata references an invalid argument index. + FunctionArgAggShapeArgOutOfBounds { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + }, + /// Function argument aggregate-shape metadata was duplicated for the same argument. + FunctionArgAggShapeDuplicate { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + }, + /// Function argument aggregate-shape metadata was attached to a non-aggregate argument. + FunctionArgAggShapeOnNonAggArg { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + /// Actual declared argument type. + actual: ValueType, + }, + /// Function argument aggregate-shape metadata references an unknown struct type. + FunctionArgAggShapeStructTypeOutOfBounds { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + /// Struct type id. + type_id: u32, + }, + /// Function argument aggregate-shape metadata references an unknown array element type. + FunctionArgAggShapeArrayElemTypeOutOfBounds { + /// Function index within the program. + func: u32, + /// Argument index within the function signature. + arg: u32, + /// Element type id. + elem_type_id: u32, + }, /// A function uses a value type not yet supported by the verifier/runtime register model. UnsupportedValueType { /// Function index within the program. @@ -631,6 +677,46 @@ impl fmt::Display for VerifyError { Self::FunctionSigCountMismatch { func } => { write!(f, "function {func} signature count mismatch") } + Self::FunctionArgAggShapeFuncOutOfBounds { func } => { + write!( + f, + "function argument aggregate shape references unknown function {func}" + ) + } + Self::FunctionArgAggShapeArgOutOfBounds { func, arg } => { + write!( + f, + "function {func} aggregate shape arg index out of bounds: {arg}" + ) + } + Self::FunctionArgAggShapeDuplicate { func, arg } => { + write!( + f, + "function {func} aggregate shape is duplicated for arg {arg}" + ) + } + Self::FunctionArgAggShapeOnNonAggArg { func, arg, actual } => { + write!( + f, + "function {func} aggregate shape on non-Agg arg {arg} (got {actual:?})" + ) + } + Self::FunctionArgAggShapeStructTypeOutOfBounds { func, arg, type_id } => { + write!( + f, + "function {func} aggregate shape for arg {arg} references unknown struct type_id {type_id}" + ) + } + Self::FunctionArgAggShapeArrayElemTypeOutOfBounds { + func, + arg, + elem_type_id, + } => { + write!( + f, + "function {func} aggregate shape for arg {arg} references unknown array elem_type_id {elem_type_id}" + ) + } Self::UnsupportedValueType { func, value_type } => { write!( f, @@ -917,6 +1003,7 @@ pub fn verify_program(program: &Program, cfg: &VerifyConfig) -> Result<(), Verif verify_host_sigs(program)?; verify_call_sigs(program)?; verify_function_value_names(program)?; + verify_function_arg_agg_shapes(program)?; for (i, func) in program.functions.iter().enumerate() { let func_id = u32::try_from(i).unwrap_or(u32::MAX); @@ -933,6 +1020,7 @@ pub fn verify_program_with_lints( verify_host_sigs(program)?; verify_call_sigs(program)?; verify_function_value_names(program)?; + verify_function_arg_agg_shapes(program)?; let mut lints: Vec = Vec::new(); for (i, func) in program.functions.iter().enumerate() { @@ -951,6 +1039,7 @@ pub fn verify_program_owned( verify_host_sigs(&program)?; verify_call_sigs(&program)?; verify_function_value_names(&program)?; + verify_function_arg_agg_shapes(&program)?; let signature_cache = build_signature_cache(&program)?; let mut verified_functions: Vec = Vec::with_capacity(program.functions.len()); @@ -974,6 +1063,7 @@ pub fn verify_program_owned_with_lints( verify_host_sigs(&program)?; verify_call_sigs(&program)?; verify_function_value_names(&program)?; + verify_function_arg_agg_shapes(&program)?; let signature_cache = build_signature_cache(&program)?; let mut verified_functions: Vec = Vec::with_capacity(program.functions.len()); @@ -1150,6 +1240,71 @@ fn verify_function_value_names(program: &Program) -> Result<(), VerifyError> { Ok(()) } +fn verify_function_arg_agg_shapes(program: &Program) -> Result<(), VerifyError> { + let mut seen: Vec<(u32, u32)> = Vec::new(); + + for e in &program.function_arg_agg_shapes { + let Some(func) = program.functions.get(e.func as usize) else { + return Err(VerifyError::FunctionArgAggShapeFuncOutOfBounds { func: e.func }); + }; + let arg_types = func + .arg_types(program) + .map_err(|_| VerifyError::FunctionArgTypesOutOfBounds { func: e.func })?; + let Some(actual) = arg_types.get(e.arg as usize).copied() else { + return Err(VerifyError::FunctionArgAggShapeArgOutOfBounds { + func: e.func, + arg: e.arg, + }); + }; + if !seen + .iter() + .any(|&(func, arg)| func == e.func && arg == e.arg) + { + seen.push((e.func, e.arg)); + } else { + return Err(VerifyError::FunctionArgAggShapeDuplicate { + func: e.func, + arg: e.arg, + }); + } + if actual != ValueType::Agg { + return Err(VerifyError::FunctionArgAggShapeOnNonAggArg { + func: e.func, + arg: e.arg, + actual, + }); + } + match &e.shape { + AggShape::Tuple { .. } => {} + AggShape::Struct(type_id) => { + if program.types.structs.get(type_id.0 as usize).is_none() { + return Err(VerifyError::FunctionArgAggShapeStructTypeOutOfBounds { + func: e.func, + arg: e.arg, + type_id: type_id.0, + }); + } + } + AggShape::Array(elem_type_id) => { + if program + .types + .array_elems + .get(elem_type_id.0 as usize) + .is_none() + { + return Err(VerifyError::FunctionArgAggShapeArrayElemTypeOutOfBounds { + func: e.func, + arg: e.arg, + elem_type_id: elem_type_id.0, + }); + } + } + } + } + + Ok(()) +} + fn verify_host_sigs(program: &Program) -> Result<(), VerifyError> { for (i, hs) in program.host_sigs.iter().enumerate() { let host_sig = u32::try_from(i).unwrap_or(u32::MAX); @@ -1466,7 +1621,8 @@ fn verify_function_bytecode( } // Type analysis + validation. - let entry_types = initial_types(reg_count, arg_types); + let arg_agg_metas = initial_arg_agg_metas(program, func_id, arg_types); + let entry_types = initial_types(reg_count, arg_types, &arg_agg_metas); let (type_in, type_out) = compute_must_types( program, &blocks, @@ -2784,6 +2940,16 @@ enum AggMeta { Array(ElemTypeId), } +impl From<&AggShape> for AggMeta { + fn from(shape: &AggShape) -> Self { + match shape { + AggShape::Tuple { elems } => Self::Tuple(elems.clone()), + AggShape::Struct(type_id) => Self::Struct(*type_id), + AggShape::Array(elem_type_id) => Self::Array(*elem_type_id), + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum RegType { Uninit, @@ -2803,20 +2969,45 @@ struct TypeState { aggs: Vec>, } -fn initial_types(reg_count: usize, arg_types: &[ValueType]) -> TypeState { +fn initial_types( + reg_count: usize, + arg_types: &[ValueType], + arg_agg_metas: &[Option], +) -> TypeState { let mut values: Vec> = vec![Some(RegType::Uninit); reg_count]; - let aggs: Vec> = vec![None; reg_count]; + let mut aggs: Vec> = vec![None; reg_count]; if reg_count != 0 { values[0] = Some(RegType::Concrete(ValueType::Unit)); // effect token for (i, &t) in arg_types.iter().enumerate() { if 1 + i < reg_count { values[1 + i] = Some(RegType::Concrete(t)); + if t == ValueType::Agg { + aggs[1 + i] = arg_agg_metas.get(i).cloned().unwrap_or(None); + } } } } TypeState { values, aggs } } +fn initial_arg_agg_metas( + program: &Program, + func_id: u32, + arg_types: &[ValueType], +) -> Vec> { + let mut out = vec![None; arg_types.len()]; + for e in program + .function_arg_agg_shapes + .iter() + .filter(|e| e.func == func_id) + { + if let Some(slot) = out.get_mut(e.arg as usize) { + *slot = Some(AggMeta::from(&e.shape)); + } + } + out +} + fn meet_value(a: Option, b: Option) -> Option { match (a, b) { // Unknown/top (used for initialization) doesn't constrain the result. @@ -3951,8 +4142,8 @@ mod tests { use crate::asm::{BuildError, FunctionSig, ProgramBuilder}; use crate::opcode::Opcode; use crate::program::{ - ByteRange, CallSigEntry, Const, FunctionDef, HostSymbol, Program, SpanId, StructTypeDef, - TypeTableDef, ValueType, + AggShape, ByteRange, CallSigEntry, Const, FunctionDef, HostSymbol, Program, SpanId, + StructTypeDef, TypeTableDef, ValueType, }; use crate::value::FuncId; use alloc::vec; @@ -4959,6 +5150,53 @@ mod tests { pb.build_checked().unwrap(); } + #[test] + fn verifier_types_tuple_get_from_abi_arg_shape() { + let mut a = Asm::new(); + a.tuple_get(2, 1, 0); + a.i64_add(3, 2, 2); + a.ret(0, &[3]); + + let mut pb = ProgramBuilder::new(); + pb.push_function_checked( + a, + FunctionSig { + arg_types: vec![ValueType::Agg], + ret_types: vec![ValueType::I64], + } + .with_arg_agg_shape(0, AggShape::tuple(vec![Some(ValueType::I64)])), + ) + .unwrap(); + pb.build_checked().unwrap(); + } + + #[test] + fn verifier_rejects_abi_arg_shape_on_non_agg_arg() { + let mut a = Asm::new(); + a.ret(0, &[]); + + let mut pb = ProgramBuilder::new(); + pb.push_function_checked( + a, + FunctionSig { + arg_types: vec![ValueType::I64], + ret_types: vec![], + } + .with_arg_agg_shape(0, AggShape::tuple(vec![])), + ) + .unwrap(); + let p = pb.build(); + + assert_eq!( + verify_program(&p, &VerifyConfig::default()), + Err(VerifyError::FunctionArgAggShapeOnNonAggArg { + func: 0, + arg: 0, + actual: ValueType::I64, + }) + ); + } + #[test] fn verifier_rejects_tuple_get_oob_index() { let mut a = Asm::new(); diff --git a/execution_tape/src/vm.rs b/execution_tape/src/vm.rs index 914f2c0..d52ce50 100644 --- a/execution_tape/src/vm.rs +++ b/execution_tape/src/vm.rs @@ -3114,7 +3114,9 @@ mod tests { use crate::asm::{Asm, FunctionSig, ProgramBuilder}; use crate::bytecode::Instr; use crate::host::{AccessSink, HostContext, HostSig, ResourceKeyRef, SigHash}; - use crate::program::{ByteRange, CallSigEntry, FunctionDef, Program, TypeTableDef, ValueType}; + use crate::program::{ + AggShape, ByteRange, CallSigEntry, FunctionDef, Program, TypeTableDef, ValueType, + }; use crate::trace::{TraceMask, TraceOutcome, TraceSink}; use crate::value::AggType; use crate::verifier::{VerifyConfig, verify_program_owned}; @@ -3882,6 +3884,48 @@ mod tests { assert_eq!(out, vec![Value::U64(2)]); } + #[test] + fn vm_executes_closure_that_reads_typed_tuple_env() { + let mut pb = ProgramBuilder::new(); + let call_sig = pb.call_sig(&[ValueType::I64], &[ValueType::I64]); + + let mut entry = Asm::new(); + entry.const_i64(1, 40); + entry.tuple_new(2, &[1]); + entry.const_func(3, FuncId(1)); + entry.closure_new(4, 3, 2); + entry.const_i64(5, 2); + entry.call_indirect(0, call_sig, 4, 0, &[5], &[6]); + entry.ret(0, &[6]); + pb.push_function_checked( + entry, + FunctionSig { + arg_types: vec![], + ret_types: vec![ValueType::I64], + }, + ) + .unwrap(); + + let mut body = Asm::new(); + body.tuple_get(3, 1, 0); + body.i64_add(4, 3, 2); + body.ret(0, &[4]); + pb.push_function_checked( + body, + FunctionSig::closure_body_with_env_shape( + &[ValueType::I64], + &[ValueType::I64], + AggShape::tuple(vec![Some(ValueType::I64)]), + ), + ) + .unwrap(); + + let p = pb.build_verified().unwrap(); + let mut vm = Vm::new(TestHost, Limits::default()); + let out = vm.run(&p, FuncId(0), &[], TraceMask::NONE, None).unwrap(); + assert_eq!(out, vec![Value::I64(42)]); + } + #[test] fn vm_traps_call_indirect_on_signature_arity_mismatch() { let entry_bytecode = crate::bytecode::encode_instructions(&[