diff --git a/regex-automata/src/meta/regex.rs b/regex-automata/src/meta/regex.rs
index ce3bae0fa..93fe6c0bb 100644
--- a/regex-automata/src/meta/regex.rs
+++ b/regex-automata/src/meta/regex.rs
@@ -892,6 +892,18 @@ impl Regex {
) -> SplitN<'r, 'h> {
SplitN { splits: self.split(input), limit }
}
+
+ /// TODO: add docs
+ pub fn split_inclusive<'r, 'h, I: Into>>(
+ &'r self,
+ input: I,
+ ) -> SplitInclusive<'r, 'h> {
+ SplitInclusive {
+ finder: self.find_iter(input),
+ last: 0,
+ span_to_yield: None,
+ }
+ }
}
/// Lower level search routines that give more control.
@@ -2278,6 +2290,58 @@ impl<'r, 'h> Iterator for SplitN<'r, 'h> {
impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {}
+/// TODO: add docs
+#[derive(Debug)]
+pub struct SplitInclusive<'r, 'h> {
+ finder: FindMatches<'r, 'h>,
+ last: usize,
+ span_to_yield: Option,
+}
+
+impl<'r, 'h> SplitInclusive<'r, 'h> {
+ /// Returns the current `Input` associated with this iterator.
+ ///
+ /// The `start` position on the given `Input` may change during iteration,
+ /// but all other values are guaranteed to remain invariant.
+ #[inline]
+ pub fn input<'s>(&'s self) -> &'s Input<'h> {
+ self.finder.input()
+ }
+}
+
+impl<'r, 'h> Iterator for SplitInclusive<'r, 'h> {
+ type Item = Span;
+
+ fn next(&mut self) -> Option {
+ if let Some(span) = self.span_to_yield {
+ self.span_to_yield = None;
+ return Some(span);
+ }
+
+ match self.finder.next() {
+ None => {
+ let len = self.input().haystack().len();
+ if self.last > len {
+ None
+ } else {
+ let span = Span::from(self.last..len);
+ self.last = len + 1; // Next call will return None
+ Some(span)
+ }
+ }
+ Some(m) => {
+ let span = Span::from(self.last..m.start()); // return this right now
+ self.span_to_yield = Some(Span::from(m.start()..m.end())); // next call will return this
+
+ self.last = m.end();
+ Some(span)
+ }
+ }
+ }
+}
+
+impl<'r, 'h> core::iter::FusedIterator for SplitInclusive<'r, 'h> {}
+
/// Represents mutable scratch space used by regex engines during a search.
///
/// Most of the regex engines in this crate require some kind of
@@ -3646,4 +3710,51 @@ mod tests {
let re = Regex::new(r"[a-zA-Z]+ing").unwrap();
assert_eq!(1, re.find_iter("tingling").count());
}
+
+ #[test]
+ fn split_inclusive() {
+ let arr = [
+ (r"(x)", "1x2", vec!["1", "x", "2"]),
+ (r"([^0-9]+)", "1-2", vec!["1", "-", "2"]),
+ (
+ r"([\r\n\t\f\v ]+)", // equivalent to r"(\s+)"
+ "this is a \n\ntest",
+ vec!["this", " ", "is", " ", "a", " \n\n", "test"],
+ ),
+ (
+ r"([^0-9a-zA-Z_])",
+ " C# is great! (not actually) :)",
+ vec![
+ "", " ", "C", "#", "", " ", "is", " ", "great", "!", "",
+ " ", "", "(", "not", " ", "actually", ")", "", " ", "",
+ ":", "", ")", "",
+ ],
+ ),
+ (
+ r"([a-zA-Z](?:[a-zA-Z']*[a-zA-Z])?)",
+ r#"He said, "I'd like to eat cake!""#,
+ vec![
+ "", "He", " ", "said", r#", ""#, "I'd", " ", "like", " ",
+ "to", " ", "eat", " ", "cake", r#"!""#,
+ ],
+ ),
+ ];
+ for (pattern, text, expected_output) in arr {
+ let out: Vec<_> = Regex::new(pattern)
+ .unwrap()
+ .split_inclusive(text)
+ .map(|sp| &text[sp])
+ .collect();
+
+ assert_eq!(
+ out, expected_output,
+ "Regex: {}, Input: {}",
+ pattern, text
+ );
+
+ // make sure that we can get the original string, from the splitted string by
+ // concatenating it
+ assert_eq!(out.join(""), text);
+ }
+ }
}