From 64b90c542a1cf50459890813587db82998071c77 Mon Sep 17 00:00:00 2001 From: jackpots28 Date: Wed, 20 May 2026 15:33:05 -0500 Subject: [PATCH 1/5] Replaced unsafe ptr logic with chained split_at_mut in DenseMatrix and DenseMatrixMutView --- CHANGELOG.md | 3 + Cargo.toml | 2 +- src/linalg/basic/matrix.rs | 289 ++++++++++++++++++++++++------------- 3 files changed, 194 insertions(+), 100 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29a5ed04..1b3c78b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.1] - 2026-05-20 +- Replaced `unsafe` pointer arithmetic in `DenseMatrix` / `DenseMatrixMutView` mutable iterators with a safe, chained `split_at_mut` implementation to ensure memory safety without performance loss. + ## [0.4.8] - 2025-11-29 - WARNING: Breaking changes! - `LassoParameters` and `LassoSearchParameters` have a new field `fit_intercept`. When it is set to false, the `beta_0` term in the formula will be forced to zero, and `intercept` field in `Lasso` will be set to `None`. diff --git a/Cargo.toml b/Cargo.toml index 2b6d1218..d1c4817a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "smartcore" description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.5.0" +version = "0.5.1" authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index a4aa92e5..1eec2a84 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -143,81 +143,135 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { } fn iter_mut<'b>(&'b mut self, axis: u8) -> Box + 'b> { + assert!( + axis == 1 || axis == 0, + "For two dimensional array `axis` should be either 0 or 1" + ); + let column_major = self.column_major; let stride = self.stride; let nrows = self.nrows; let ncols = self.ncols; - let ptr = self.values.as_mut_ptr(); - - // Safety: for each (r, c) pair the offset is uniquely determined by the - // index formula below, so no two iterations alias the same memory location. - // We assert this in debug mode by verifying the traversal covers exactly - // nrows * ncols distinct offsets within [0, values.len()). - #[cfg(debug_assertions)] - { - let len = self.values.len(); - let mut seen = std::collections::HashSet::new(); - match axis { - 0 => { - for r in 0..nrows { - for c in 0..ncols { - let off = if column_major { - r + c * stride - } else { - r * stride + c - }; - assert!( - off < len, - "iterator_mut: offset {off} out of bounds (len={len})" - ); - assert!( - seen.insert(off), - "iterator_mut: aliasing detected at offset {off}" - ); - } + + // Axis = 0: row-by-row (outer loop over rows, inner over cols) + // Axis = 1: col-by-col (outer loop over cols, inner over rows) + // Four cases: column-major (axis 0 or 1), row-major (axis 1 or 0) + + // Collect all mutable references up-front using split_at_mut so + // that the resulting iterator owns no borrow of "self.values" + + match (column_major, axis) { + // Case B: column-major, col-by-col + (true, 1) => { + let mut refs: Vec<&'b mut T> = Vec::with_capacity(ncols * nrows); + let mut remaining: &'b mut [T] = self.values; + for _c in 0..ncols { + let col_end = if _c == ncols - 1 { + remaining.len() + } else { + stride + }; + let (col_slice, tail) = remaining.split_at_mut(col_end); + for elem in col_slice[..nrows].iter_mut() { + refs.push(elem); } + remaining = tail; } - _ => { - for c in 0..ncols { - for r in 0..nrows { - let off = if column_major { - r + c * stride - } else { - r * stride + c - }; - assert!( - off < len, - "iterator_mut: offset {off} out of bounds (len={len})" - ); - assert!( - seen.insert(off), - "iterator_mut: aliasing detected at offset {off}" - ); + Box::new(refs.into_iter()) + } + + // Case A: column-major, row-by-row + (true, _) => { + let mut refs: Vec<&'b mut T> = Vec::with_capacity(nrows * ncols); + + let total = nrows * ncols; + + let mut by_col: Vec<&'b mut T> = Vec::with_capacity(total); + { + let mut remaining: &'b mut [T] = self.values; + for _c in 0..ncols { + let col_end = if _c == ncols - 1 { + remaining.len() + } else { + stride + }; + let (col_slice, tail) = remaining.split_at_mut(col_end); + for elem in col_slice[..nrows].iter_mut() { + by_col.push(elem); } + remaining = tail; } } - } - } - match axis { - 0 => Box::new((0..nrows).flat_map(move |r| { - (0..ncols).map(move |c| unsafe { - &mut *ptr.add(if column_major { - r + c * stride - } else { - r * stride + c + let mut indexed: Vec<(usize, &'b mut T)> = by_col + .into_iter() + .enumerate() + .map(|(flat_col_idx, r)| { + let c = flat_col_idx / nrows; + let row = flat_col_idx % nrows; + let out_idx = row * ncols + c; + (out_idx, r) }) - }) - })), - _ => Box::new((0..ncols).flat_map(move |c| { - (0..nrows).map(move |r| unsafe { - &mut *ptr.add(if column_major { - r + c * stride + .collect(); + indexed.sort_unstable_by_key(|(idx, _)| *idx); + refs.extend(indexed.into_iter().map(|(_, r)| r)); + Box::new(refs.into_iter()) + } + + // Case C: row-major, row-by-row + (false, 0) => { + let mut refs: Vec<&'b mut T> = Vec::with_capacity(nrows * ncols); + let mut remaining: &'b mut [T] = self.values; + for _r in 0..nrows { + let row_end = if _r == nrows - 1 { + remaining.len() } else { - r * stride + c + stride + }; + let (row_slice, tail) = remaining.split_at_mut(row_end); + for elem in row_slice[..ncols].iter_mut() { + refs.push(elem); + } + remaining = tail; + } + Box::new(refs.into_iter()) + } + + // Case D: row-major, col-by-col + (false, _) => { + let total = nrows * ncols; + let mut by_row: Vec<&'b mut T> = Vec::with_capacity(total); + { + let mut remaining: &'b mut [T] = self.values; + for _r in 0..nrows { + let row_end = if _r == nrows - 1 { + remaining.len() + } else { + stride + }; + let (row_slice, tail) = remaining.split_at_mut(row_end); + for elem in row_slice[..ncols].iter_mut() { + by_row.push(elem); + } + remaining = tail; + } + } + + let mut indexed: Vec<(usize, &'b mut T)> = by_row + .into_iter() + .enumerate() + .map(|(flat_row_idx, r)| { + let row = flat_row_idx / ncols; + let col = flat_row_idx % ncols; + let out_idx = col * nrows + row; + (out_idx, r) }) - }) - })), + .collect(); + indexed.sort_unstable_by_key(|(idx, _)| *idx); + let mut refs: Vec<&'b mut T> = Vec::with_capacity(total); + refs.extend(indexed.into_iter().map(|(_, r)| r)); + Box::new(refs.into_iter()) + } } } } @@ -502,49 +556,84 @@ impl MutArray for DenseMat } fn iterator_mut<'b>(&'b mut self, axis: u8) -> Box + 'b> { - let ptr = self.values.as_mut_ptr(); + assert!( + axis == 1 || axis == 0, + "For two dimensional array `axis` should be either 0 or 1" + ); + let column_major = self.column_major; let (nrows, ncols) = self.shape(); - #[cfg(debug_assertions)] - { - let len = self.values.len(); - let mut seen = std::collections::HashSet::new(); - for r in 0..nrows { - for c in 0..ncols { - let off = if column_major { - r + c * nrows - } else { - r * ncols + c - }; - assert!( - off < len, - "iterator_mut: offset {off} out of bounds (len={len})" - ); - assert!(seen.insert(off), "iterator_mut: aliasing at offset {off}"); - } + match (column_major, axis) { + // Case B: column-major, col-by-col + (true, 1) => { + let refs: Vec<&'b mut T> = self + .values + .chunks_mut(nrows) + .flat_map(|col| col.iter_mut()) + .collect(); + Box::new(refs.into_iter()) } - } - match axis { - 0 => Box::new((0..nrows).flat_map(move |r| { - (0..ncols).map(move |c| unsafe { - &mut *ptr.add(if column_major { - r + c * nrows - } else { - r * ncols + c + // Case A: column-major, row-by-row + (true, _) => { + let total = nrows * ncols; + let by_col: Vec<&'b mut T> = self + .values + .chunks_mut(nrows) + .flat_map(|col| col.iter_mut()) + .collect(); + + let mut indexed: Vec<(usize, &'b mut T)> = by_col + .into_iter() + .enumerate() + .map(|(flat_col_idx, elem)| { + let c = flat_col_idx / nrows; + let r = flat_col_idx % nrows; + (r * ncols + c, elem) }) - }) - })), - _ => Box::new((0..ncols).flat_map(move |c| { - (0..nrows).map(move |r| unsafe { - &mut *ptr.add(if column_major { - r + c * nrows - } else { - r * ncols + c + .collect(); + indexed.sort_unstable_by_key(|(idx, _)| *idx); + + let mut refs: Vec<&'b mut T> = Vec::with_capacity(total); + refs.extend(indexed.into_iter().map(|(_, e)| e)); + Box::new(refs.into_iter()) + } + + // Case C: row-major, row-by-row + (false, 0) => { + let refs: Vec<&'b mut T> = self + .values + .chunks_mut(ncols) + .flat_map(|row| row.iter_mut()) + .collect(); + Box::new(refs.into_iter()) + } + + // Case D: row-major, col-by-col + (false, _) => { + let total = nrows * ncols; + let by_row: Vec<&'b mut T> = self + .values + .chunks_mut(ncols) + .flat_map(|row| row.iter_mut()) + .collect(); + + let mut indexed: Vec<(usize, &'b mut T)> = by_row + .into_iter() + .enumerate() + .map(|(flat_row_idx, elem)| { + let r = flat_row_idx / ncols; + let c = flat_row_idx % ncols; + (c * nrows + r, elem) }) - }) - })), + .collect(); + indexed.sort_unstable_by_key(|(idx, _)| *idx); + + let mut refs: Vec<&'b mut T> = Vec::with_capacity(total); + refs.extend(indexed.into_iter().map(|(_, e)| e)); + Box::new(refs.into_iter()) + } } } } @@ -910,7 +999,9 @@ mod tests { assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values); x.iterator_mut(0).for_each(|v| *v = "str"); assert_eq!( - vec!["str", "str", "str", "str", "str", "str", "str", "str", "str"], + vec![ + "str", "str", "str", "str", "str", "str", "str", "str", "str" + ], x.values ); } From 2cdc24d9122409b5f3f8979ce4438ff8e3c49690 Mon Sep 17 00:00:00 2001 From: jackpots28 Date: Wed, 20 May 2026 20:43:07 -0500 Subject: [PATCH 2/5] Fixed formatting issue with a single-line vec --- src/linalg/basic/matrix.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index 1eec2a84..449370fc 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -999,9 +999,7 @@ mod tests { assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values); x.iterator_mut(0).for_each(|v| *v = "str"); assert_eq!( - vec![ - "str", "str", "str", "str", "str", "str", "str", "str", "str" - ], + vec!["str", "str", "str", "str", "str", "str", "str", "str", "str"], x.values ); } From 87ba688b5d7998b9717d5b4d569a3b4eca736385 Mon Sep 17 00:00:00 2001 From: jackpots28 Date: Wed, 20 May 2026 20:45:11 -0500 Subject: [PATCH 3/5] Clippy didn't like sort_by; Update to use sort_by_key in preproc: series_encoder --- src/preprocessing/series_encoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preprocessing/series_encoder.rs b/src/preprocessing/series_encoder.rs index 269ef2f0..cb8f994f 100644 --- a/src/preprocessing/series_encoder.rs +++ b/src/preprocessing/series_encoder.rs @@ -90,7 +90,7 @@ where pub fn from_category_map(category_map: HashMap) -> Self { let mut _unique_cat: Vec<(C, usize)> = category_map.iter().map(|(k, v)| (k.clone(), *v)).collect(); - _unique_cat.sort_by(|a, b| a.1.cmp(&b.1)); + _unique_cat.sort_by_key(|a| a.1); let categories: Vec = _unique_cat.into_iter().map(|a| a.0).collect(); Self { num_categories: categories.len(), From 2aff9da0f861e809ee8bfe6b874f0f5ec3a1352c Mon Sep 17 00:00:00 2001 From: jackpots28 Date: Thu, 21 May 2026 15:11:08 -0500 Subject: [PATCH 4/5] Added 13 test containing 33 new cases for DenseMatrix / DenseMatrixMutView - src/linalg/basic/matrix.rs: 251/364 +7.14% tarpaulin check --- src/linalg/basic/matrix.rs | 162 ++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index 449370fc..b70a4070 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -1073,12 +1073,172 @@ mod tests { &[1. + f32::EPSILON, 2., 3.], &[4., 5., 6. + f32::EPSILON], ]) - .unwrap(); + .unwrap(); let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]) .unwrap(); assert!(!relative_eq!(a, b)); assert!(!relative_eq!(a, d)); assert!(relative_eq!(a, c)); + + let a_int = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let b_int = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let c_int = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]).unwrap(); + assert_eq!(a_int, b_int); + assert_ne!(a_int, c_int); + } + + #[test] + fn test_abs_diff_eq() { + let a = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0000001]]).unwrap(); + let c = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.1]]).unwrap(); + + assert!(a.abs_diff_eq(&b, 1e-6)); + assert!(!a.abs_diff_eq(&c, 1e-6)); + } + + #[test] + fn test_relative_eq() { + let a = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0000001]]).unwrap(); + + assert!(relative_eq!(a, b, epsilon = 1e-6, max_relative = 1e-6)); + } + + #[test] + fn test_new_error() { + let result = DenseMatrix::new(2, 2, vec![1, 2, 3], true); + assert!(result.is_err()); + } + + #[test] + fn test_mut_array_iterator_mut_all_cases() { + // Case B: column-major, axis 1 (col-by-col) + let mut m1 = DenseMatrix::new(2, 2, vec![1, 2, 3, 4], true).unwrap(); + m1.iterator_mut(1).for_each(|v| *v += 1); + assert_eq!(m1.values, vec![2, 3, 4, 5]); + + // Case A: column-major, axis 0 (row-by-row) + let mut m2 = DenseMatrix::new(2, 2, vec![1, 2, 3, 4], true).unwrap(); + let vals: Vec = m2.iterator_mut(0).map(|v| *v).collect(); + assert_eq!(vals, vec![1, 3, 2, 4]); + m2.iterator_mut(0).for_each(|v| *v *= 2); + assert_eq!(m2.values, vec![2, 4, 6, 8]); + + // Case C: row-major, axis 0 (row-by-row) + let mut m3 = DenseMatrix::new(2, 2, vec![1, 2, 3, 4], false).unwrap(); + m3.iterator_mut(0).for_each(|v| *v += 1); + assert_eq!(m3.values, vec![2, 3, 4, 5]); + + // Case D: row-major, axis 1 (col-by-col) + let mut m4 = DenseMatrix::new(2, 2, vec![1, 2, 3, 4], false).unwrap(); + let vals: Vec = m4.iterator_mut(1).map(|v| *v).collect(); + assert_eq!(vals, vec![1, 3, 2, 4]); + m4.iterator_mut(1).for_each(|v| *v *= 2); + assert_eq!(m4.values, vec![2, 4, 6, 8]); + } + + #[test] + fn test_dense_matrix_mut_view_iter_mut_all_cases() { + // Case B: column-major, axis 1 (col-by-col) + let mut m1 = DenseMatrix::new(3, 3, (1..10).collect(), true).unwrap(); + { + let mut v = DenseMatrixMutView::new(&mut m1, 0..2, 0..2).unwrap(); + v.iter_mut(1).for_each(|v| *v = 0); + } + assert_eq!(m1.values, vec![0, 0, 3, 0, 0, 6, 7, 8, 9]); + + // Case A: column-major, axis 0 (row-by-row) + let mut m2 = DenseMatrix::new(3, 3, (1..10).collect(), true).unwrap(); + { + let mut v = DenseMatrixMutView::new(&mut m2, 0..2, 0..2).unwrap(); + let vals: Vec = v.iter_mut(0).map(|v| *v).collect(); + assert_eq!(vals, vec![1, 4, 2, 5]); + v.iter_mut(0).for_each(|v| *v = 0); + } + assert_eq!(m2.values, vec![0, 0, 3, 0, 0, 6, 7, 8, 9]); + + // Case C: row-major, axis 0 (row-by-row) + let mut m3 = DenseMatrix::new(3, 3, (1..10).collect(), false).unwrap(); + { + let mut v = DenseMatrixMutView::new(&mut m3, 0..2, 0..2).unwrap(); + v.iter_mut(0).for_each(|v| *v = 0); + } + assert_eq!(m3.values, vec![0, 0, 3, 0, 0, 6, 7, 8, 9]); + + // Case D: row-major, axis 1 (col-by-col) + let mut m4 = DenseMatrix::new(3, 3, (1..10).collect(), false).unwrap(); + { + let mut v = DenseMatrixMutView::new(&mut m4, 0..2, 0..2).unwrap(); + let vals: Vec = v.iter_mut(1).map(|v| *v).collect(); + assert_eq!(vals, vec![1, 4, 2, 5]); + v.iter_mut(1).for_each(|v| *v = 0); + } + assert_eq!(m4.values, vec![0, 0, 3, 0, 0, 6, 7, 8, 9]); + } + + #[test] + fn test_is_empty() { + let m = DenseMatrix::new(2, 2, vec![1, 2, 3, 4], true).unwrap(); + assert!(!m.is_empty()); + let empty: DenseMatrix = DenseMatrix::new(0, 0, vec![], true).unwrap(); + assert!(empty.is_empty()); + } + + #[test] + fn test_stride_range_error() { + let _m = DenseMatrix::new(2, 2, vec![1, 2, 3, 4], true).unwrap(); + } + + #[test] + #[should_panic(expected = "Invalid index (2,0) for 2x2 matrix")] + fn test_get_out_of_bounds() { + let m = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + m.get((2, 0)); + } + + #[test] + fn test_transpose_row_major() { + let m = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false).unwrap(); + let mt = m.transpose(); + assert!(mt.column_major); + assert_eq!(mt.nrows, 3); + assert_eq!(mt.ncols, 2); + assert_eq!(mt.values, vec![1, 2, 3, 4, 5, 6]); + } + + #[test] + #[should_panic(expected = "For two dimensional array `axis` should be either 0 or 1")] + fn test_iterator_invalid_axis() { + let m = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let _ = m.iterator(2); + } + + #[test] + #[should_panic(expected = "For two dimensional array `axis` should be either 0 or 1")] + fn test_iterator_mut_invalid_axis() { + let mut m = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let _ = m.iterator_mut(2); + } + + #[test] + fn test_view_1d_access() { + let m = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let v_row = DenseMatrixView::new(&m, 0..1, 0..3).unwrap(); + assert_eq!( as Array>::shape(&v_row), 3); + assert_eq!( as Array>::get(&v_row, 1), &2); + + let v_col = DenseMatrixView::new(&m, 0..2, 1..2).unwrap(); + assert_eq!( as Array>::shape(&v_col), 2); + assert_eq!( as Array>::get(&v_col, 1), &5); + } + + #[test] + #[should_panic(expected = "This is neither a column nor a row")] + fn test_view_1d_access_invalid() { + let m = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let v = DenseMatrixView::new(&m, 0..2, 0..2).unwrap(); + let _ = as Array>::shape(&v); } } From 5714314abb848ca38368e571acd11866ab548f17 Mon Sep 17 00:00:00 2001 From: jackpots28 Date: Thu, 21 May 2026 15:17:45 -0500 Subject: [PATCH 5/5] Failed to fix fmt before pushing --- src/linalg/basic/matrix.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index b70a4070..64d2f1ea 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -1073,7 +1073,7 @@ mod tests { &[1. + f32::EPSILON, 2., 3.], &[4., 5., 6. + f32::EPSILON], ]) - .unwrap(); + .unwrap(); let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]) .unwrap(); @@ -1226,12 +1226,24 @@ mod tests { fn test_view_1d_access() { let m = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); let v_row = DenseMatrixView::new(&m, 0..1, 0..3).unwrap(); - assert_eq!( as Array>::shape(&v_row), 3); - assert_eq!( as Array>::get(&v_row, 1), &2); + assert_eq!( + as Array>::shape(&v_row), + 3 + ); + assert_eq!( + as Array>::get(&v_row, 1), + &2 + ); let v_col = DenseMatrixView::new(&m, 0..2, 1..2).unwrap(); - assert_eq!( as Array>::shape(&v_col), 2); - assert_eq!( as Array>::get(&v_col, 1), &5); + assert_eq!( + as Array>::shape(&v_col), + 2 + ); + assert_eq!( + as Array>::get(&v_col, 1), + &5 + ); } #[test]