From 3276143f77ccc6dba0ba1c81cf3541ce7a36fac0 Mon Sep 17 00:00:00 2001 From: lyranowl Date: Sun, 10 Aug 2025 22:00:15 +0500 Subject: [PATCH 1/3] `feature`: implemented `string.pack`, `string.unpack`, `string.packsize` --- src/stdlib/string.rs | 197 +++++++++++++- src/stdlib/string/pack.rs | 495 ++++++++++++++++++++++++++++++++++ src/stdlib/string/packsize.rs | 115 ++++++++ src/stdlib/string/unpack.rs | 432 +++++++++++++++++++++++++++++ 4 files changed, 1238 insertions(+), 1 deletion(-) create mode 100644 src/stdlib/string/pack.rs create mode 100644 src/stdlib/string/packsize.rs create mode 100644 src/stdlib/string/unpack.rs diff --git a/src/stdlib/string.rs b/src/stdlib/string.rs index 556537c8..722a1f4f 100644 --- a/src/stdlib/string.rs +++ b/src/stdlib/string.rs @@ -1,4 +1,10 @@ -use crate::{Callback, CallbackReturn, Context, FromValue, String, Table, Value}; +use crate::{Callback, CallbackReturn, Context, Error, FromValue, IntoValue, String, Table, Value}; + +mod pack; +mod packsize; +mod unpack; + +use std::mem; pub fn load_string<'gc>(ctx: Context<'gc>) { let string = Table::new(&ctx); @@ -97,9 +103,198 @@ pub fn load_string<'gc>(ctx: Context<'gc>) { }), ); + string.set_field( + ctx, + "pack", + Callback::from_fn(&ctx, |ctx, _, mut stack| { + let fmt = stack.consume::(ctx)?; + + let fmt = fmt.to_str()?; + + let bytes = pack::process(fmt, ctx, &stack)?; + + stack.replace(ctx, ctx.intern(&bytes)); + + Ok(CallbackReturn::Return) + }), + ); + + string.set_field( + ctx, + "unpack", + Callback::from_fn(&ctx, |ctx, _, mut stack| { + let (fmt, s, init) = stack.consume::<(String, String, Option)>(ctx)?; + + let fmt = fmt.to_str()?; + let bytes = s.as_bytes(); + let init = init.unwrap_or(1); + + let start_pos = if init >= 0 { + init.saturating_sub(1) as usize + } else { + bytes.len().saturating_sub(init.unsigned_abs() as usize) + }; + + if start_pos > bytes.len() { + return Err("initial position out of string bounds" + .into_value(ctx) + .into()); + } + + let (values, position) = unpack::process(fmt, bytes, start_pos as u64, ctx)?; + + stack.replace(ctx, values); + stack.into_back(ctx, position as i64); + + Ok(CallbackReturn::Return) + }), + ); + + string.set_field( + ctx, + "packsize", + Callback::from_fn(&ctx, |ctx, _, mut stack| { + let fmt = stack.consume::(ctx)?; + let fmt = fmt.to_str()?; + + let total_size = packsize::process(fmt, ctx)?; + + stack.replace(ctx, total_size as i64); + Ok(CallbackReturn::Return) + }), + ); + + string.set_field( + ctx, + "rep", + Callback::from_fn(&ctx, |ctx, _, mut stack| { + let (s, n, sep) = stack.consume::<(String, i64, Option)>(ctx)?; + + if n <= 0 { + stack.replace(ctx, ctx.intern(b"")); + return Ok(CallbackReturn::Return); + } + + if n == 1 { + stack.replace(ctx, s); + return Ok(CallbackReturn::Return); + } + + let s = s.as_bytes(); + let n = n as usize; + let sep = sep.map(|s| s.as_bytes()).unwrap_or(b""); + + let s_total_len = s.len().checked_mul(n); + let sep_total_len = sep.len().checked_mul(n - 1); + + let required_cap = match (s_total_len, sep_total_len) { + (Some(s_total), Some(sep_total)) => s_total.checked_add(sep_total), + _ => None, + }; + + let capacity = required_cap + .ok_or_else(|| Error::from_value("resulting string too large".into_value(ctx)))?; + + let mut result = Vec::with_capacity(capacity); + result.extend_from_slice(s); + for _ in 1..n { + result.extend_from_slice(sep); + result.extend_from_slice(s); + } + + stack.replace(ctx, ctx.intern(&result)); + Ok(CallbackReturn::Return) + }), + ); + ctx.set_global("string", string); } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Endianness { + Little, + Big, + Native, +} + +impl Default for Endianness { + fn default() -> Self { + Endianness::Native + } +} + +#[derive(Debug, Clone, Copy)] +struct FormatState { + endianness: Endianness, + max_alignment: usize, +} + +impl Default for FormatState { + fn default() -> Self { + FormatState { + endianness: Endianness::default(), + max_alignment: 1, + } + } +} + +fn parse_optional_int( + chars: &mut std::iter::Peekable, + max_val: usize, +) -> Result, std::string::String> { + let mut n_str = std::string::String::new(); + while let Some(c) = chars.peek() { + if c.is_ascii_digit() { + n_str.push(*c); + chars.next(); + } else { + break; + } + } + + if n_str.is_empty() { + Ok(None) + } else { + let n = n_str + .parse::() + .map_err(|_| format!("invalid number '{}' in format string", n_str))?; + if n == 0 || n > max_val { + Err(format!("number '{}' out of range [1, {}]", n, max_val)) + } else { + Ok(Some(n)) + } + } +} + +fn calculate_padding(current_pos: usize, data_size: usize, max_alignment: usize) -> usize { + if max_alignment == 0 || data_size == 0 { + return 0; + } + let alignment = std::cmp::min(data_size, max_alignment); + if alignment == 0 || !alignment.is_power_of_two() { + return 0; + } + (alignment - (current_pos % alignment)) % alignment +} + +fn get_format_size(format_char: char, num_opt: Option) -> Option { + match format_char { + 'b' | 'B' | 'x' => Some(1), + 'h' | 'H' => Some(mem::size_of::()), + 'l' | 'L' => Some(mem::size_of::()), + 'j' => Some(mem::size_of::()), + 'J' => Some(mem::size_of::()), + 'T' => Some(mem::size_of::()), + 'i' | 'I' => num_opt.or(Some(mem::size_of::())), + 'f' => Some(mem::size_of::()), + 'd' | 'n' => Some(mem::size_of::()), + 'c' => num_opt, + 'z' => None, + 's' => None, + _ => None, + } +} + fn sub(string: &[u8], i: i64, j: Option) -> Result<&[u8], std::num::TryFromIntError> { let i = match i { i if i > 0 => i.saturating_sub(1).try_into()?, diff --git a/src/stdlib/string/pack.rs b/src/stdlib/string/pack.rs new file mode 100644 index 00000000..8f1e8069 --- /dev/null +++ b/src/stdlib/string/pack.rs @@ -0,0 +1,495 @@ +use std::{ + io::{self, Cursor, Write}, + mem, +}; + +use crate::{Context, Error, IntoValue, Stack}; + +use super::{calculate_padding, get_format_size, parse_optional_int, Endianness, FormatState}; + +pub fn process<'gc>( + fmt: &str, + ctx: Context<'gc>, + stack: &Stack<'gc, '_>, +) -> Result, Error<'gc>> { + let mut state = FormatState::default(); + let mut writer = Cursor::new(Vec::new()); + let mut current_argument_index = 0; + + let mut chars = fmt.chars().peekable(); + + while let Some(format_char) = chars.next() { + let current_writer_position = writer.position() as usize; + let num_opt = parse_optional_int(&mut chars, 16) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + + match format_char { + '<' => state.endianness = Endianness::Little, + '>' => state.endianness = Endianness::Big, + '=' => state.endianness = Endianness::Native, + '!' => { + let n = num_opt.ok_or_else(|| { + Error::from_value("missing number for '!' option".into_value(ctx)) + })?; + if n < 1 || n > 16 || (n & (n - 1)) != 0 { + return Err(format!( + "alignment option '!' requires a power of 2 between 1 and 16 (got {})", + n + ) + .into_value(ctx) + .into()); + } + state.max_alignment = n; + } + ' ' => {} + 'x' => { + write_padding(&mut writer, 1)?; + } + 'X' => { + let mut chars_peek = chars.clone(); + let align_char = chars_peek.next().ok_or_else(|| { + Error::from_value("'X' must be followed by an option character".into_value(ctx)) + })?; + let align_num_opt = parse_optional_int(&mut chars_peek, 16) + .map_err(|err| Into::::into(err.into_value(ctx)))?; + + let data_size_for_align = match align_char { + 's' => { + let len_size = align_num_opt.unwrap_or(mem::size_of::()); + if !(1..=16).contains(&len_size) { + return Err(Error::from_value( + "size for 's' in X must be 1-16".into_value(ctx), + )); + } + Some(len_size) + } + 'c' | 'z' => Some(0), + _ => get_format_size(align_char, align_num_opt), + }; + + let data_size = data_size_for_align.ok_or_else(|| { + Error::from_value( + format!("invalid option '{}' following 'X'", align_char).into_value(ctx), + ) + })?; + + let padding = + calculate_padding(current_writer_position, data_size, state.max_alignment); + write_padding(&mut writer, padding)?; + } + op @ ('b' | 'B' | 'h' | 'H' | 'l' | 'L' | 'j' | 'J' | 'T' | 'i' | 'I' | 'f' | 'd' + | 'n') => { + check_pack_arg(ctx, stack.len(), current_argument_index, op)?; + let arg_val = stack.get(current_argument_index); + current_argument_index += 1; + + let data_size = get_format_size(op, num_opt).unwrap(); + let padding = + calculate_padding(current_writer_position, data_size, state.max_alignment); + write_padding(&mut writer, padding)?; + + match op { + 'b' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + write_int_n(&mut writer, val, 1, &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'B' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + if val < 0 { + return Err(Error::from_value( + format!( + "negative value {} provided for unsigned format '{}'", + val, op + ) + .into_value(ctx), + )); + } + write_uint_n(&mut writer, val, 1, &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'h' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + write_int_n(&mut writer, val, mem::size_of::(), &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'H' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + if val < 0 { + return Err(Error::from_value( + format!( + "negative value {} provided for unsigned format '{}'", + val, op + ) + .into_value(ctx), + )); + } + write_uint_n(&mut writer, val, mem::size_of::(), &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'l' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + write_int_n(&mut writer, val, mem::size_of::(), &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'L' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + if val < 0 { + return Err(Error::from_value( + format!( + "negative value {} provided for unsigned format '{}'", + val, op + ) + .into_value(ctx), + )); + } + write_uint_n(&mut writer, val, mem::size_of::(), &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'j' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + write_int_n(&mut writer, val, mem::size_of::(), &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'J' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + if val < 0 { + return Err(Error::from_value( + format!( + "negative value {} provided for unsigned format '{}'", + val, op + ) + .into_value(ctx), + )); + } + write_uint_n(&mut writer, val, mem::size_of::(), &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'T' => { + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + if val < 0 { + return Err(Error::from_value( + format!( + "negative value {} provided for unsigned format '{}'", + val, op + ) + .into_value(ctx), + )); + } + write_uint_n(&mut writer, val, mem::size_of::(), &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'i' => { + let size = num_opt.unwrap_or(mem::size_of::()); + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + write_int_n(&mut writer, val, size, &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'I' => { + let size = num_opt.unwrap_or(mem::size_of::()); + let val = arg_val.to_integer().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be an `integer`", op) + .into_value(ctx), + ) + })?; + if val < 0 { + return Err(Error::from_value( + format!( + "negative value {} provided for unsigned format '{}'", + val, op + ) + .into_value(ctx), + )); + } + write_uint_n(&mut writer, val, size, &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + } + 'f' => { + let val = arg_val.to_number().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be a `number`", op) + .into_value(ctx), + ) + })?; + write_float(&mut writer, val as f32, &state) + .map_err(|e| Error::from_value(e.to_string().into_value(ctx)))?; + } + 'd' => { + let val = arg_val.to_number().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be a `number`", op) + .into_value(ctx), + ) + })?; + write_double(&mut writer, val, &state) + .map_err(|e| Error::from_value(e.to_string().into_value(ctx)))?; + } + 'n' => { + let val = arg_val.to_number().ok_or_else(|| { + Error::from_value( + format!("argument for format '{}' must be a `number`", op) + .into_value(ctx), + ) + })?; + write_double(&mut writer, val, &state) + .map_err(|e| Error::from_value(e.to_string().into_value(ctx)))?; + } + _ => unreachable!(), + } + } + 'c' => { + let n = num_opt.ok_or_else(|| { + Into::::into("missing number for 'c' option".into_value(ctx)) + })?; + check_pack_arg(ctx, stack.len(), current_argument_index, 'c')?; + let arg_val = stack.get(current_argument_index); + current_argument_index += 1; + + let s = arg_val.into_string(ctx).ok_or_else(|| { + Error::from_value("argument for format 'c' must be a `string`".into_value(ctx)) + })?; + let bytes = s.as_bytes(); + + if bytes.len() >= n { + writer.write_all(&bytes[..n])? + } else { + writer.write_all(bytes)?; + write_padding(&mut writer, n - bytes.len())?; + } + } + 'z' => { + check_pack_arg(ctx, stack.len(), current_argument_index, 'z')?; + let arg_val = stack.get(current_argument_index); + current_argument_index += 1; + + let s = arg_val.into_string(ctx).ok_or_else(|| { + Error::from_value("argument for format 'z' must be a `string`".into_value(ctx)) + })?; + let bytes = s.as_bytes(); + + writer.write_all(bytes)?; + writer.write_all(&[0])? + } + 's' => { + let len_size = num_opt.unwrap_or(mem::size_of::()); + if len_size < 1 || len_size > 16 { + return Err(Error::from_value( + "string length size must be between 1 and 16 bytes".into_value(ctx), + )); + } + + check_pack_arg(ctx, stack.len(), current_argument_index, 's')?; + let arg_val = stack.get(current_argument_index); + current_argument_index += 1; + + let s = arg_val.into_string(ctx).ok_or_else(|| { + Error::from_value("argument for format 's' must be a `string`".into_value(ctx)) + })?; + let bytes = s.as_bytes(); + let str_len = bytes.len() as u64; + + let padding = + calculate_padding(current_writer_position, len_size, state.max_alignment); + write_padding(&mut writer, padding)?; + let str_len_i64 = i64::try_from(str_len).map_err(|_| { + Error::from_value("string length too large to represent as i64".into_value(ctx)) + })?; + write_uint_n(&mut writer, str_len_i64, len_size, &state) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + + writer.write_all(bytes)? + } + invalid => { + return Err(Error::from_value( + format!("invalid conversion option '{}' in format string", invalid) + .into_value(ctx), + )); + } + } + } + + Ok(writer.into_inner()) +} + +fn check_pack_arg(ctx: Context, stack_len: usize, index: usize, op: char) -> Result<(), Error> { + if index >= stack_len { + Err(format!("missing argument for format '{}'", op) + .into_value(ctx) + .into()) + } else { + Ok(()) + } +} + +fn write_padding(writer: &mut impl Write, padding: usize) -> Result<(), std::io::Error> { + for _ in 0..padding { + writer.write_all(&[0])? + } + Ok(()) +} + +fn write_int_n( + writer: &mut W, + value: i64, + size: usize, + state: &FormatState, +) -> Result<(), std::string::String> { + if !(1..=16).contains(&size) { + return Err("integer size must be between 1 and 16".to_string()); + } + + let min_val = -(1i128 << (size * 8 - 1)); + let max_val = (1i128 << (size * 8 - 1)) - 1; + + if (value as i128) < min_val || (value as i128) > max_val { + return Err(format!( + "integer {} does not fit in {} signed bytes", + value, size + )); + } + + let mut bytes = [0u8; 16]; + let src_bytes = value.to_ne_bytes(); + + bytes[..8].copy_from_slice(&src_bytes); + + if size > 8 { + let sign_byte = if value < 0 { 0xff } else { 0x00 }; + for i in 8..size { + bytes[i] = sign_byte; + } + } + + write_bytes_endian(writer, &bytes[..size], state.endianness).map_err(|e| e.to_string()) +} + +fn write_uint_n( + writer: &mut W, + value: i64, + size: usize, + state: &FormatState, +) -> Result<(), std::string::String> { + if !(1..=16).contains(&size) { + return Err("integer size must be between 1 and 16".to_string()); + } + if value < 0 { + return Err(format!( + "negative value {} provided for unsigned format", + value + )); + } + let u_value = value as u64; + + let max_val = if size == 16 { + u128::MAX + } else { + (1u128 << (size * 8)) - 1 + }; + + if (u_value as u128) > max_val { + return Err(format!( + "unsigned integer {} does not fit in {} bytes", + u_value, size + )); + } + + let mut bytes = [0u8; 16]; + let src_bytes = u_value.to_ne_bytes(); + + bytes[..8].copy_from_slice(&src_bytes); + + write_bytes_endian(writer, &bytes[..size], state.endianness).map_err(|e| e.to_string()) +} + +fn write_bytes_endian( + writer: &mut W, + bytes_to_write: &[u8], + endianness: Endianness, +) -> io::Result<()> { + match endianness { + Endianness::Little => { + if cfg!(target_endian = "little") { + writer.write_all(bytes_to_write) + } else { + writer.write_all(&bytes_to_write.iter().rev().copied().collect::>()) + } + } + Endianness::Big => { + if cfg!(target_endian = "big") { + writer.write_all(bytes_to_write) + } else { + writer.write_all(&bytes_to_write.iter().rev().copied().collect::>()) + } + } + Endianness::Native => writer.write_all(bytes_to_write), + } +} + +fn write_float(writer: &mut W, value: f32, state: &FormatState) -> io::Result<()> { + let bytes = match state.endianness { + Endianness::Little => value.to_le_bytes(), + Endianness::Big => value.to_be_bytes(), + Endianness::Native => value.to_ne_bytes(), + }; + writer.write_all(&bytes) +} + +fn write_double(writer: &mut W, value: f64, state: &FormatState) -> io::Result<()> { + let bytes = match state.endianness { + Endianness::Little => value.to_le_bytes(), + Endianness::Big => value.to_be_bytes(), + Endianness::Native => value.to_ne_bytes(), + }; + writer.write_all(&bytes) +} diff --git a/src/stdlib/string/packsize.rs b/src/stdlib/string/packsize.rs new file mode 100644 index 00000000..1d4048e5 --- /dev/null +++ b/src/stdlib/string/packsize.rs @@ -0,0 +1,115 @@ +use super::{calculate_padding, get_format_size, parse_optional_int, Endianness, FormatState}; +use crate::{Context, Error, IntoValue}; +use std::mem; + +pub fn process<'gc>(fmt: &str, ctx: Context<'gc>) -> Result> { + let mut state = FormatState::default(); + let mut total_size: usize = 0; + let mut current_offset: usize = 0; + let mut chars = fmt.chars().peekable(); + + while let Some(format_char) = chars.next() { + let num_opt = parse_optional_int(&mut chars, 16) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + + match format_char { + '<' => state.endianness = Endianness::Little, + '>' => state.endianness = Endianness::Big, + '=' => state.endianness = Endianness::Native, + '!' => { + let n = num_opt.ok_or_else(|| { + Error::from_value("missing number for '!' option".into_value(ctx)) + })?; + if !n.is_power_of_two() || n > 16 { + return Err(format!( + "alignment option '!' requires a power of 2 between 1 and 16 (got {})", + n + ) + .into_value(ctx) + .into()); + } + state.max_alignment = n; + } + ' ' => {} + 'x' => { + total_size += 1; + current_offset += 1; + } + 'X' => { + let mut chars_peek = chars.clone(); + let align_char = chars_peek.next().ok_or_else(|| { + Error::from_value("'X' must be followed by an option character".into_value(ctx)) + })?; + let align_num_opt = parse_optional_int(&mut chars_peek, 16) + .map_err(|err| Into::::into(err.into_value(ctx)))?; + + let data_size_for_align = match align_char { + 's' => { + let len_size = align_num_opt.unwrap_or(mem::size_of::()); + if !(1..=16).contains(&len_size) { + return Err(Error::from_value( + "size for 's' in X must be 1-16".into_value(ctx), + )); + } + Some(len_size) + } + 'c' | 'z' => Some(0), + _ => get_format_size(align_char, align_num_opt), + }; + + let data_size = data_size_for_align.ok_or_else(|| { + Error::from_value( + format!("invalid option '{}' following 'X'", align_char).into_value(ctx), + ) + })?; + + let padding = calculate_padding(current_offset, data_size, state.max_alignment); + total_size += padding; + current_offset += padding; + } + op @ ('b' | 'B' | 'h' | 'H' | 'l' | 'L' | 'j' | 'J' | 'T' | 'i' | 'I' | 'f' | 'd' + | 'n') => { + let data_size = get_format_size(op, num_opt).ok_or_else(|| { + Error::from_value( + format!("internal error getting size for '{}'", op).into_value(ctx), + ) + })?; + + let padding = calculate_padding(current_offset, data_size, state.max_alignment); + total_size += padding + data_size; + current_offset += padding + data_size; + } + 'c' => { + let n = num_opt.ok_or_else(|| { + Into::::into("missing number for 'c' option".into_value(ctx)) + })?; + total_size += n; + current_offset += n; + } + 'z' => { + return Err(Error::from_value( + "variable-length format ('z')".into_value(ctx), + )); + } + 's' => { + let len_size = num_opt.unwrap_or(mem::size_of::()); + if len_size < 1 || len_size > 16 { + return Err(Error::from_value( + "string length size must be between 1 and 16 bytes".into_value(ctx), + )); + } + let padding = calculate_padding(current_offset, len_size, state.max_alignment); + total_size += padding + len_size; + current_offset += padding + len_size; + } + invalid => { + return Err(Error::from_value( + format!("invalid conversion option '{}' in format string", invalid) + .into_value(ctx), + )); + } + } + } + + Ok(total_size) +} diff --git a/src/stdlib/string/unpack.rs b/src/stdlib/string/unpack.rs new file mode 100644 index 00000000..42799161 --- /dev/null +++ b/src/stdlib/string/unpack.rs @@ -0,0 +1,432 @@ +use std::{io::Cursor, mem}; + +use crate::{Context, Error, IntoValue, Value}; + +use super::{calculate_padding, get_format_size, parse_optional_int, Endianness, FormatState}; + +pub fn process<'gc>( + fmt: &str, + bytes: &[u8], + start_pos: u64, + ctx: Context<'gc>, +) -> Result<(Vec>, u64), Error<'gc>> { + let mut reader = Cursor::new(bytes); + reader.set_position(start_pos as u64); + + let mut state = FormatState::default(); + let mut values: Vec = Vec::new(); + let mut chars = fmt.chars().peekable(); + + while let Some(format_char) = chars.next() { + let initial_read_pos = reader.position() as usize; + let num_opt = parse_optional_int(&mut chars, 16) + .map_err(|err| Error::from_value(err.into_value(ctx)))?; + + match format_char { + '<' => state.endianness = Endianness::Little, + '>' => state.endianness = Endianness::Big, + '=' => state.endianness = Endianness::Native, + '!' => { + let n = num_opt.ok_or_else(|| { + Error::from_value("missing number for '!' option".into_value(ctx)) + })?; + if !n.is_power_of_two() || n > 16 { + return Err(format!( + "alignment option '!' requires a power of 2 between 1 and 16 (got {})", + n + ) + .into_value(ctx) + .into()); + } + state.max_alignment = n; + } + ' ' => {} + 'x' => { + read_exact_bytes(&mut reader, 1, 'x', ctx)?; + } + 'X' => { + let mut chars_peek = chars.clone(); + let align_char = chars_peek.next().ok_or_else(|| { + Error::from_value("'X' must be followed by an option character".into_value(ctx)) + })?; + let align_num_opt = parse_optional_int(&mut chars_peek, 16) + .map_err(|err| Into::::into(err.into_value(ctx)))?; + + let data_size_for_align = match align_char { + 's' => { + let len_size = align_num_opt.unwrap_or(mem::size_of::()); + if !(1..=16).contains(&len_size) { + return Err(Error::from_value( + "size for 's' in X must be 1-16".into_value(ctx), + )); + } + Some(len_size) + } + 'c' | 'z' => Some(0), + _ => get_format_size(align_char, align_num_opt), + }; + + let data_size = data_size_for_align.ok_or_else(|| { + Error::from_value( + format!("invalid option '{}' following 'X'", align_char).into_value(ctx), + ) + })?; + + let padding = calculate_padding(initial_read_pos, data_size, state.max_alignment); + read_padding(&mut reader, padding, ctx)?; + } + op @ ('b' | 'B' | 'h' | 'H' | 'l' | 'L' | 'j' | 'J' | 'T' | 'i' | 'I' | 'f' | 'd' + | 'n') => { + let data_size = get_format_size(op, num_opt).ok_or_else(|| { + // Should not happen for these options + Error::from_value( + format!("internal error getting size for '{}'", op).into_value(ctx), + ) + })?; + + let padding = calculate_padding(initial_read_pos, data_size, state.max_alignment); + read_padding(&mut reader, padding, ctx)?; + + let value = match op { + 'b' => { + let val128 = read_int_n(&mut reader, 1, &state, op, ctx)?; + (val128 as i64).into_value(ctx) + } + 'B' => { + let val128 = read_uint_n(&mut reader, 1, &state, op, ctx)?; + if val128 > i64::MAX as u128 { + return Err(Error::from_value( + format!( + "unsigned value {} read for format '{}' does not fit in `integer`", + val128, op + ) + .into_value(ctx), + )); + } + (val128 as i64).into_value(ctx) + } + 'h' => { + let size = mem::size_of::(); + let val128 = read_int_n(&mut reader, size, &state, op, ctx)?; + (val128 as i64).into_value(ctx) + } + 'H' => { + let size = mem::size_of::(); + let val128 = read_uint_n(&mut reader, size, &state, op, ctx)?; + if val128 > i64::MAX as u128 { + return Err(Error::from_value( + format!( + "unsigned value {} read for format '{}' does not fit in `integer`", + val128, op + ) + .into_value(ctx), + )); + } + (val128 as i64).into_value(ctx) + } + 'l' => { + let size = mem::size_of::(); + let val128 = read_int_n(&mut reader, size, &state, op, ctx)?; + (val128 as i64).into_value(ctx) + } + 'L' => { + let size = mem::size_of::(); + let val128 = read_uint_n(&mut reader, size, &state, op, ctx)?; + if val128 > i64::MAX as u128 { + return Err(Error::from_value( + format!( + "unsigned value {} read for format '{}' does not fit in `integer`", + val128, op + ) + .into_value(ctx), + )); + } + (val128 as i64).into_value(ctx) + } + 'j' => { + let size = mem::size_of::(); + let val128 = read_int_n(&mut reader, size, &state, op, ctx)?; + (val128 as i64).into_value(ctx) + } + 'J' => { + let size = mem::size_of::(); + let val128 = read_uint_n(&mut reader, size, &state, op, ctx)?; + if val128 > i64::MAX as u128 { + return Err(Error::from_value( + format!( + "unsigned value {} read for format '{}' does not fit in `integer`", + val128, op + ) + .into_value(ctx), + )); + } + (val128 as i64).into_value(ctx) + } + 'T' => { + let size = mem::size_of::(); + let val128 = read_uint_n(&mut reader, size, &state, op, ctx)?; + if val128 > i64::MAX as u128 { + return Err(Error::from_value( + format!( + "unsigned value {} read for format '{}' does not fit in `integer`", + val128, op + ) + .into_value(ctx), + )); + } + (val128 as i64).into_value(ctx) + } + 'i' => { + let size = num_opt.unwrap_or(mem::size_of::()); + let val128 = read_int_n(&mut reader, size, &state, op, ctx)?; + if val128 < i64::MIN as i128 || val128 > i64::MAX as i128 { + return Err(Error::from_value( + format!( + "integer value {} read for format '{}' does not fit in `integer`", + val128, op + ) + .into_value(ctx), + )); + } + (val128 as i64).into_value(ctx) + } + 'I' => { + let size = num_opt.unwrap_or(mem::size_of::()); + let val128 = read_uint_n(&mut reader, size, &state, op, ctx)?; + if val128 > i64::MAX as u128 { + return Err(Error::from_value(format!( + "unsigned value {} read for format '{}' does not fit in `integer`", + val128, op + ).into_value(ctx))); + } + (val128 as i64).into_value(ctx) + } + 'f' => read_float(&mut reader, &state, ctx)?.into_value(ctx), + 'd' => read_double(&mut reader, &state, op, ctx)?.into_value(ctx), + 'n' => read_double(&mut reader, &state, op, ctx)?.into_value(ctx), + _ => unreachable!(), + }; + values.push(value); + } + 'c' => { + let n = num_opt.ok_or_else(|| { + Into::::into("missing number for 'c' option".into_value(ctx)) + })?; + let bytes = read_exact_bytes(&mut reader, n, 'c', ctx)?; + values.push(ctx.intern(bytes).into_value(ctx)); + } + 'z' => { + let buffer = reader.get_ref(); + let current_pos = reader.position() as usize; + let remaining_bytes = &buffer[current_pos..]; + let null_pos = remaining_bytes.iter().position(|&b| b == 0); + + match null_pos { + Some(pos) => { + let str_bytes = &remaining_bytes[..pos]; + values.push(ctx.intern(str_bytes).into_value(ctx)); + reader.set_position((current_pos + pos + 1) as u64); + } + None => { + return Err(Error::from_value( + "missing null terminator for 'z' format".into_value(ctx), + )); + } + } + } + 's' => { + let len_size = num_opt.unwrap_or(mem::size_of::()); + if len_size < 1 || len_size > 16 { + return Err(Error::from_value( + "string length size must be between 1 and 16 bytes".into_value(ctx), + )); + } + + let padding = calculate_padding(initial_read_pos, len_size, state.max_alignment); + read_padding(&mut reader, padding, ctx)?; + + let str_len_u128 = read_uint_n(&mut reader, len_size, &state, 's', ctx)?; + let str_len = usize::try_from(str_len_u128).map_err(|_| { + Error::from_value("string length too large for usize".into_value(ctx)) + })?; + if str_len_u128 > i64::MAX as u128 { + return Err(Error::from_value( + "string length value does not fit in `integer`".into_value(ctx), + )); + } + + let str_bytes = read_exact_bytes(&mut reader, str_len, 's', ctx)?; + values.push(ctx.intern(str_bytes).into_value(ctx)); + } + invalid => { + return Err(Error::from_value( + format!("invalid conversion option '{}' in format string", invalid) + .into_value(ctx), + )); + } + } + } + + Ok((values, reader.position().wrapping_add(1))) +} + +fn read_exact_bytes<'a, 'gc>( + reader: &mut Cursor<&'a [u8]>, + count: usize, + op: char, + ctx: Context<'gc>, +) -> Result<&'a [u8], Error<'gc>> { + let start = reader.position() as usize; + if start + count > reader.get_ref().len() { + return Err(Error::from_value( + format!("data string too short for format '{}'", op).into_value(ctx), + )); + } + reader.set_position((start + count) as u64); + Ok(&reader.get_ref()[start..start + count]) +} + +fn read_padding<'gc>( + reader: &mut Cursor<&[u8]>, + padding: usize, + ctx: Context<'gc>, +) -> Result<(), Error<'gc>> { + if padding > 0 { + read_exact_bytes(reader, padding, 'X', ctx)?; + } + Ok(()) +} + +fn read_int_n<'gc>( + reader: &mut Cursor<&[u8]>, + size: usize, + state: &FormatState, + op: char, + ctx: Context<'gc>, +) -> Result> { + if !(1..=16).contains(&size) { + return Err(Error::from_value( + "integer size must be between 1 and 16".into_value(ctx), + )); + } + let read_bytes = read_exact_bytes(reader, size, op, ctx)?; + let mut bytes = [0u8; 16]; + + match state.endianness { + Endianness::Little => { + if cfg!(target_endian = "little") { + bytes[..size].copy_from_slice(read_bytes); + } else { + for (i, byte) in read_bytes.iter().rev().enumerate() { + if i < size { + bytes[i] = *byte; + } + } + } + } + Endianness::Big => { + if cfg!(target_endian = "big") { + bytes[..size].copy_from_slice(read_bytes); + } else { + for (i, byte) in read_bytes.iter().rev().enumerate() { + if i < size { + bytes[i] = *byte; + } + } + } + } + Endianness::Native => { + bytes[..size].copy_from_slice(read_bytes); + } + } + + let mut value = i128::from_ne_bytes(bytes); + + if size < 16 { + let shift = 128 - (size * 8); + value = (value << shift) >> shift; + } + + Ok(value) +} + +fn read_uint_n<'gc>( + reader: &mut Cursor<&[u8]>, + size: usize, + state: &FormatState, + op: char, + ctx: Context<'gc>, +) -> Result> { + if !(1..=16).contains(&size) { + return Err(Error::from_value( + "integer size must be between 1 and 16".into_value(ctx), + )); + } + let read_bytes = read_exact_bytes(reader, size, op, ctx)?; + let mut bytes = [0u8; 16]; + + match state.endianness { + Endianness::Little => { + if cfg!(target_endian = "little") { + bytes[..size].copy_from_slice(read_bytes); + } else { + for (i, byte) in read_bytes.iter().rev().enumerate() { + if i < size { + bytes[i] = *byte; + } + } + } + } + Endianness::Big => { + if cfg!(target_endian = "big") { + bytes[..size].copy_from_slice(read_bytes); + } else { + for (i, byte) in read_bytes.iter().rev().enumerate() { + if i < size { + bytes[i] = *byte; + } + } + } + } + Endianness::Native => { + bytes[..size].copy_from_slice(read_bytes); + } + } + + let value = u128::from_ne_bytes(bytes); + + Ok(value) +} + +fn read_float<'gc>( + reader: &mut Cursor<&[u8]>, + state: &FormatState, + ctx: Context<'gc>, +) -> Result> { + let bytes = read_exact_bytes(reader, mem::size_of::(), 'f', ctx)?; + let arr: [u8; 4] = bytes + .try_into() + .map_err(|_| Error::from_value("internal error: float size mismatch".into_value(ctx)))?; + Ok(match state.endianness { + Endianness::Little => f32::from_le_bytes(arr), + Endianness::Big => f32::from_be_bytes(arr), + Endianness::Native => f32::from_ne_bytes(arr), + }) +} + +fn read_double<'gc>( + reader: &mut Cursor<&[u8]>, + state: &FormatState, + op: char, + ctx: Context<'gc>, +) -> Result> { + let bytes = read_exact_bytes(reader, mem::size_of::(), op, ctx)?; + let arr: [u8; 8] = bytes + .try_into() + .map_err(|_| Error::from_value("internal error: double size mismatch".into_value(ctx)))?; + Ok(match state.endianness { + Endianness::Little => f64::from_le_bytes(arr), + Endianness::Big => f64::from_be_bytes(arr), + Endianness::Native => f64::from_ne_bytes(arr), + }) +} From 8d9cbdc59050802fb38dec00feccdd62462eacb4 Mon Sep 17 00:00:00 2001 From: lyranowl Date: Sun, 10 Aug 2025 22:02:06 +0500 Subject: [PATCH 2/3] update COMPABILITY.md --- COMPATIBILITY.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/COMPATIBILITY.md b/COMPATIBILITY.md index cdb6108f..e301168c 100644 --- a/COMPATIBILITY.md +++ b/COMPATIBILITY.md @@ -99,12 +99,12 @@ likely not be implemented due to differences between piccolo and PUC-Lua. | 🔵 | `len(s)` | | | | 🔵 | `lower(s)` | | | | ⚫️️ | `match(s, pattern[, init])` | | | -| ⚫️️ | `pack(fmt, values...)` | | | -| ⚫️️ | `packsize(fmt)` | | | +| 🔵 | `pack(fmt, values...)` | | | +| 🔵 | `packsize(fmt)` | | | | ⚫️️ | `rep(s, n[, sep])` | | | | 🔵 | `reverse(s)` | | | | 🔵 | `sub(s, i[, j])` | | | -| ⚫️️ | `unpack(fmt, s[, pos])` | | | +| 🔵 | `unpack(fmt, s[, pos])` | | | | 🔵 | `upper(s)` | | | ## UTF8 From 150507e193a75b7e64ba74a89b3836865010eaac Mon Sep 17 00:00:00 2001 From: lyranowl Date: Sun, 10 Aug 2025 22:15:04 +0500 Subject: [PATCH 3/3] remove `string.rep` --- src/stdlib/string.rs | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/src/stdlib/string.rs b/src/stdlib/string.rs index 722a1f4f..2a1bd2f7 100644 --- a/src/stdlib/string.rs +++ b/src/stdlib/string.rs @@ -164,49 +164,6 @@ pub fn load_string<'gc>(ctx: Context<'gc>) { }), ); - string.set_field( - ctx, - "rep", - Callback::from_fn(&ctx, |ctx, _, mut stack| { - let (s, n, sep) = stack.consume::<(String, i64, Option)>(ctx)?; - - if n <= 0 { - stack.replace(ctx, ctx.intern(b"")); - return Ok(CallbackReturn::Return); - } - - if n == 1 { - stack.replace(ctx, s); - return Ok(CallbackReturn::Return); - } - - let s = s.as_bytes(); - let n = n as usize; - let sep = sep.map(|s| s.as_bytes()).unwrap_or(b""); - - let s_total_len = s.len().checked_mul(n); - let sep_total_len = sep.len().checked_mul(n - 1); - - let required_cap = match (s_total_len, sep_total_len) { - (Some(s_total), Some(sep_total)) => s_total.checked_add(sep_total), - _ => None, - }; - - let capacity = required_cap - .ok_or_else(|| Error::from_value("resulting string too large".into_value(ctx)))?; - - let mut result = Vec::with_capacity(capacity); - result.extend_from_slice(s); - for _ in 1..n { - result.extend_from_slice(sep); - result.extend_from_slice(s); - } - - stack.replace(ctx, ctx.intern(&result)); - Ok(CallbackReturn::Return) - }), - ); - ctx.set_global("string", string); }