From 514a0f933ae3c1ef039107c9f4a860f4461c6a2f Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Sun, 12 May 2024 23:26:33 -0400 Subject: [PATCH 1/2] [WIP] find_indices() --- src/iter/find_indices.rs | 56 ++++++++++++++++++++++++++++++++++++++++ src/iter/mod.rs | 2 ++ src/pattern.rs | 19 ++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 src/iter/find_indices.rs diff --git a/src/iter/find_indices.rs b/src/iter/find_indices.rs new file mode 100644 index 0000000..4fb4449 --- /dev/null +++ b/src/iter/find_indices.rs @@ -0,0 +1,56 @@ +use crate::pattern::NewlinePattern; +use core::iter::FusedIterator; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FindIndices<'a, NP> { + pattern: &'a NP, + s: &'a str, + offset: usize, +} + +impl<'a, NP> FindIndices<'a, NP> { + pub(crate) fn new(pattern: &'a NP, s: &'a str) -> Self { + FindIndices { + pattern, + s, + offset: 0, + } + } +} + +impl Iterator for FindIndices<'_, NP> { + type Item = (usize, usize); + + fn next(&mut self) -> Option<(usize, usize)> { + if self.s.is_empty() { + return None; + } + let Some((start, end)) = self.pattern.search(self.s) else { + self.s = ""; + return None; + }; + self.s = &self.s[end..]; + let start = start.saturating_add(self.offset); + let end = end.saturating_add(self.offset); + self.offset = end; + Some((start, end)) + } +} + +impl FusedIterator for FindIndices<'_, NP> {} + +impl DoubleEndedIterator for FindIndices<'_, NP> { + fn next_back(&mut self) -> Option<(usize, usize)> { + if self.s.is_empty() { + return None; + } + let Some((start, end)) = self.pattern.rsearch(self.s) else { + self.s = ""; + return None; + }; + self.s = &self.s[..start]; + let start = start.saturating_add(self.offset); + let end = end.saturating_add(self.offset); + Some((start, end)) + } +} diff --git a/src/iter/mod.rs b/src/iter/mod.rs index 6d378f3..a38a9dc 100644 --- a/src/iter/mod.rs +++ b/src/iter/mod.rs @@ -1,6 +1,7 @@ //! Iterator types mod complement; mod diff; +mod find_indices; mod inner; mod intersection; mod into_iter; @@ -8,6 +9,7 @@ mod symdiff; mod union; pub use self::complement::*; pub use self::diff::*; +pub use self::find_indices::*; pub use self::intersection::*; pub use self::into_iter::*; pub use self::symdiff::*; diff --git a/src/pattern.rs b/src/pattern.rs index d2ed584..672afee 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -1,3 +1,4 @@ +use crate::iter::FindIndices; use crate::nl::Newline; use crate::nlset::NewlineSet; @@ -13,6 +14,13 @@ mod private { pub trait NewlinePattern: private::Sealed { fn search(&self, s: &str) -> Option<(usize, usize)>; fn rsearch(&self, s: &str) -> Option<(usize, usize)>; + + fn find_indices<'a>(&'a self, s: &'a str) -> FindIndices<'a, Self> + where + Self: Sized, + { + FindIndices::new(self, s) + } } impl NewlinePattern for Newline { @@ -180,4 +188,15 @@ mod tests { } } } + + // newline: find_indices() + // CR ~ \r\r\n + // CRLF ~ \r\r\n + // rev() + // next() mixed with next_back() + + // newline set: find_indices() + // {CR, CRLF} ~ \r\r\n + // rev() + // next() mixed with next_back() } From e38bdd246fe1966d10f5c2e986e36d5b2569dffe Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 16 May 2024 14:12:40 -0400 Subject: [PATCH 2/2] [WIP] ASCII tests --- src/pattern.rs | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/src/pattern.rs b/src/pattern.rs index 672afee..cedf6c9 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -187,6 +187,99 @@ mod tests { assert!(nlset.contains(Newline::try_from(&s[start..end]).unwrap())); } } + + mod find_indices { + use super::*; + + mod ascii { + use super::*; + + #[test] + fn empty() { + let mut iter = NewlineSet::ASCII.find_indices(""); + assert_eq!(iter.next(), None); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_back(), None); + assert_eq!(iter.next_back(), None); + } + + #[test] + fn no_newline() { + let mut iter = NewlineSet::ASCII.find_indices("foobar"); + assert_eq!(iter.next(), None); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_back(), None); + assert_eq!(iter.next_back(), None); + } + + #[rstest] + #[case("\n", (0, 1))] + #[case("\r", (0, 1))] + #[case("\r\n", (0, 2))] + #[case("foo\n", (3, 4))] + #[case("foo\r", (3, 4))] + #[case("foo\r\n", (3, 5))] + #[case("\nfoo", (0, 1))] + #[case("\rfoo", (0, 1))] + #[case("\r\nfoo", (0, 2))] + #[case("foo\nbar", (3, 4))] + #[case("foo\rbar", (3, 4))] + #[case("foo\r\nbar", (3, 5))] + #[case("foo“\n”bar", (6, 7))] + #[case("foo“\r”bar", (6, 7))] + #[case("foo“\r\n”bar", (6, 8))] + fn one_newline(#[case] s: &str, #[case] value: (usize, usize)) { + let mut iter = NewlineSet::ASCII.find_indices(s); + assert_eq!(iter.next(), Some(value)); + assert_eq!(iter.next(), None); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_back(), None); + assert_eq!(iter.next_back(), None); + let mut riter = NewlineSet::ASCII.find_indices(s); + assert_eq!(riter.next_back(), Some(value)); + assert_eq!(riter.next_back(), None); + assert_eq!(riter.next_back(), None); + assert_eq!(riter.next(), None); + assert_eq!(riter.next(), None); + } + + #[rstest] + #[case("\n\r", (0, 1), (1, 2))] + #[case("foo\n\rbar", (3, 4), (4, 5))] + #[case("foo\n\nbar", (3, 4), (4, 5))] + #[case("foo\r\rbar", (3, 4), (4, 5))] + #[case("foo\nbar\n", (3, 4), (7, 8))] + #[case("foo\rbar\r", (3, 4), (7, 8))] + #[case("foo\r\nbar\r\n", (3, 5), (8, 10))] + fn two_newlines( + #[case] s: &str, + #[case] nel1: (usize, usize), + #[case] nel2: (usize, usize), + ) { + let mut iter = NewlineSet::ASCII.find_indices(s); + assert_eq!(iter.next(), Some(nel1)); + assert_eq!(iter.next(), Some(nel2)); + assert_eq!(iter.next(), None); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_back(), None); + assert_eq!(iter.next_back(), None); + let mut riter = NewlineSet::ASCII.find_indices(s); + assert_eq!(riter.next_back(), Some(nel2)); + assert_eq!(riter.next_back(), Some(nel1)); + assert_eq!(riter.next_back(), None); + assert_eq!(riter.next_back(), None); + assert_eq!(riter.next(), None); + assert_eq!(riter.next(), None); + let mut diter = NewlineSet::ASCII.find_indices(s); + assert_eq!(diter.next(), Some(nel1)); + assert_eq!(diter.next_back(), Some(nel2)); + assert_eq!(diter.next(), None); + assert_eq!(diter.next(), None); + assert_eq!(diter.next_back(), None); + assert_eq!(diter.next_back(), None); + } + } + } } // newline: find_indices()