Implement group checks iteratively.

olson-sean-k · Jan 26, 2024 · 7223de9 · 7223de9
1 parent 043caf3
commit 7223de9
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 165 deletions.
diff --git a/src/rule.rs b/src/rule.rs
@@ -15,6 +15,7 @@ use itertools::Itertools as _;
 #[cfg(feature = "miette")]
 use miette::{Diagnostic, LabeledSpan, SourceCode};
 use std::borrow::Cow;
+use std::collections::VecDeque;
 use std::convert::Infallible;
 #[cfg(feature = "miette")]
 use std::fmt::Display;
@@ -477,7 +478,7 @@ where
 
     fn is_some_and_any_in<'i, 't, A, R, I, P>(
         token: Option<&'i Token<'t, A>>,
-        read: R,
+        traversal: R,
         mut predicate: P,
     ) -> bool
     where
@@ -486,7 +487,7 @@ where
         P: FnMut(&'i Token<'t, A>) -> bool,
     {
         token.map_or(false, |token| {
-            read(token).any(move |TokenEntry { token, .. }| predicate(token))
+            traversal(token).any(move |TokenEntry { token, .. }| predicate(token))
         })
     }
 
@@ -520,7 +521,7 @@ where
         #[allow(clippy::ptr_arg)] expression: &'i Cow<'t, str>,
         token: &'i Token<'t, A>,
         label: &'static str,
-    ) -> impl 'i + Copy + Fn(CorrelatedError) -> RuleError<'t>
+    ) -> impl 'i + Copy + FnOnce(CorrelatedError) -> RuleError<'t>
     where
         't: 'i,
         A: Spanned,
@@ -534,51 +535,6 @@ where
         }
     }
 
-    // TODO: Implement this iteratively.
-    fn recurse<'i, 't, A>(
-        // This is a somewhat unusual API, but it allows the lifetime `'t` of the `Cow` to be
-        // properly forwarded to output values (`RuleError`).
-        #[allow(clippy::ptr_arg)] expression: &Cow<'t, str>,
-        concatenation: &'i [Token<'t, A>],
-        outer: Outer<'i, 't, A>,
-    ) -> Result<(), RuleError<'t>>
-    where
-        't: 'i,
-        A: Spanned,
-    {
-        use BranchKind::{Alternation, Repetition};
-
-        for (left, token, right) in concatenation.iter().adjacent().map(Adjacency::into_tuple) {
-            match token.as_branch() {
-                Some(Alternation(ref alternation)) => {
-                    let outer = outer.or(left, right);
-                    let diagnose = diagnose(expression, token, "in this alternation");
-                    for token in alternation.tokens() {
-                        let tokens = token.concatenation();
-                        if let Some(terminals) = tokens.terminals() {
-                            check_group(terminals, outer).map_err(diagnose)?;
-                            check_group_alternative(terminals, outer).map_err(diagnose)?;
-                        }
-                        recurse(expression, tokens, outer)?;
-                    }
-                },
-                Some(Repetition(ref repetition)) => {
-                    let outer = outer.or(left, right);
-                    let diagnose = diagnose(expression, token, "in this repetition");
-                    let tokens = repetition.token().concatenation();
-                    if let Some(terminals) = tokens.terminals() {
-                        check_group(terminals, outer).map_err(diagnose)?;
-                        check_group_repetition(terminals, outer, repetition.cardinality())
-                            .map_err(diagnose)?;
-                    }
-                    recurse(expression, tokens, outer)?;
-                },
-                _ => {},
-            }
-        }
-        Ok(())
-    }
-
     fn check_group<'i, 't, A>(
         terminals: Terminals<&'i Token<'t, A>>,
         outer: Outer<'i, 't, A>,
@@ -773,21 +729,58 @@ where
         }
     }
 
-    recurse(
-        tree.expression(),
-        tree.as_token().concatenation(),
-        Outer::default(),
-    )
+    let mut outer = Outer::default();
+    let mut tokens: VecDeque<_> = Some(tree.as_token()).into_iter().collect();
+    while let Some(token) = tokens.pop_front() {
+        use BranchKind::{Alternation, Repetition};
+
+        for (left, token, right) in token
+            .concatenation()
+            .iter()
+            .adjacent()
+            .map(Adjacency::into_tuple)
+        {
+            match token.as_branch() {
+                Some(Alternation(ref alternation)) => {
+                    outer = outer.or(left, right);
+                    let diagnose = diagnose(tree.expression(), token, "in this alternation");
+                    for token in alternation.tokens() {
+                        let concatenation = token.concatenation();
+                        if let Some(terminals) = concatenation.terminals() {
+                            check_group(terminals, outer).map_err(diagnose)?;
+                            check_group_alternative(terminals, outer).map_err(diagnose)?;
+                        }
+                    }
+                    tokens.extend(alternation.tokens());
+                },
+                Some(Repetition(ref repetition)) => {
+                    outer = outer.or(left, right);
+                    let diagnose = diagnose(tree.expression(), token, "in this repetition");
+                    let token = repetition.token();
+                    let concatenation = token.concatenation();
+                    if let Some(terminals) = concatenation.terminals() {
+                        check_group(terminals, outer).map_err(diagnose)?;
+                        check_group_repetition(terminals, outer, repetition.cardinality())
+                            .map_err(diagnose)?;
+                    }
+                    tokens.push_back(token);
+                },
+                _ => {},
+            }
+        }
+    }
+    Ok(())
 }
 
 // TODO: `Cardinality` orders its inputs and so repetition expressions like `<a:6,1>` where the
-//       finite bounds are in nonconventional order cannot be detected here (and construct
-//       reasonable tokens). The parser disallows any ambiguous syntax, but a question remains:
-//       should such bounds be rejected? If so, `Cardinality` may need an unchecked constructor.
+//       finite bounds are in nonconventional order cannot be detected here. The parser disallows
+//       any ambiguous syntax, but a question remains: should such bounds be rejected? If so,
+//       `Cardinality` may need an unchecked constructor.
 //
 //       For now, this function only checks for empty bounds like `<a:0,0>`.
 //
-//       This highlights the advantages of using separate syntax and semantic (intermediate) trees.
+//       This highlights the advantages of using separate syntactic and semantic (intermediate)
+//       trees.
 fn cardinality<'t, A>(tree: &Tokenized<'t, A>) -> Result<(), RuleError<'t>>
 where
     A: Spanned,

diff --git a/src/token/parse.rs b/src/token/parse.rs
@@ -538,7 +538,7 @@ pub fn parse(expression: &str) -> Result<Tokenized<'_, ExpressionMetadata>, Pars
     if expression.is_empty() {
         Ok(Tokenized {
             expression: expression.into(),
-            token: Token::empty((0, 0)),
+            token: Token::empty(Default::default()),
         })
     }
     else {

diff --git a/src/token/walk.rs b/src/token/walk.rs
@@ -20,49 +20,6 @@ impl<'i, 't, A> Isomeric for TokenFeed<'i, 't, A> {
     }
 }
 
-//#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
-//pub enum Position {
-//    Conjunctive { depth: usize },
-//    Disjunctive { depth: usize, branch: usize },
-//}
-//
-//impl Position {
-//    pub fn depth(&self) -> usize {
-//        match self {
-//            Position::Conjunctive { ref depth } | Position::Disjunctive { ref depth, .. } => *depth,
-//        }
-//    }
-//
-//    // This may appear to operate in place.
-//    #[must_use]
-//    fn conjunction(self) -> Self {
-//        match self {
-//            Position::Conjunctive { depth } | Position::Disjunctive { depth, .. } => {
-//                Position::Conjunctive { depth: depth + 1 }
-//            },
-//        }
-//    }
-//
-//    // This may appear to operate in place.
-//    #[must_use]
-//    fn disjunction(self, branch: usize) -> Self {
-//        match self {
-//            Position::Conjunctive { depth } | Position::Disjunctive { depth, .. } => {
-//                Position::Disjunctive {
-//                    depth: depth + 1,
-//                    branch,
-//                }
-//            },
-//        }
-//    }
-//}
-//
-//impl Default for Position {
-//    fn default() -> Self {
-//        Position::Conjunctive { depth: 0 }
-//    }
-//}
-
 #[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
 pub struct Depth {
     pub conjunctive: usize,
@@ -119,6 +76,8 @@ impl Position {
     }
 }
 
+// Together with `Child`, this type prevents `Enqueue` implementers from enqueuing arbitrary tokens
+// that are not associated with a branch.
 #[derive(Debug)]
 #[repr(transparent)]
 pub struct Parent<T>(T);
@@ -139,6 +98,8 @@ impl<'i, 't, A> TokenParent<'i, 't, A> {
     }
 }
 
+// Together with `Parent`, this type prevents `Enqueue` implementers from enqueuing arbitrary
+// tokens that are not associated with a branch.
 #[derive(Debug)]
 #[repr(transparent)]
 pub struct Child<T>(T);
@@ -182,42 +143,6 @@ impl Enqueue for Reverse {
     }
 }
 
-#[derive(Default)]
-pub struct Starting;
-
-impl Enqueue for Starting {
-    fn enqueue<'i, 't, A>(
-        &mut self,
-        parent: TokenParent<'i, 't, A>,
-    ) -> impl Iterator<Item = TokenChild<'i, 't, A>> {
-        let composition = parent.as_ref().composition();
-        let tokens = parent.into_tokens();
-        let n = match composition {
-            Composition::Conjunctive(_) => 1,
-            Composition::Disjunctive(_) => tokens.len(),
-        };
-        tokens.take(n)
-    }
-}
-
-#[derive(Default)]
-pub struct Ending;
-
-impl Enqueue for Ending {
-    fn enqueue<'i, 't, A>(
-        &mut self,
-        parent: TokenParent<'i, 't, A>,
-    ) -> impl Iterator<Item = TokenChild<'i, 't, A>> {
-        let composition = parent.as_ref().composition();
-        let tokens = parent.into_tokens();
-        let n = match composition {
-            Composition::Conjunctive(_) => tokens.len().saturating_sub(1),
-            Composition::Disjunctive(_) => 0,
-        };
-        tokens.skip(n)
-    }
-}
-
 #[derive(Clone, Copy, Debug)]
 pub struct WalkEntry<T> {
     pub position: Position,
@@ -305,28 +230,19 @@ where
     type Feed = TokenFeed<'i, 't, A>;
 }
 
-// TODO: This function outputs an `Iterator` rather than a `HierarchicalIterator`, because any
-//       filtering performed by the `Enqueue` bypasses `SeparatingFilter`. This is a purely
-//       philosophical choice: does such filtering betray the API in some way or not?
-pub fn read<'i, 't, T, E>(
+pub fn with_reader<'i, 't, T, E>(
     tree: &'i T,
     reader: E,
-) -> impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, T::Annotation>>
+) -> impl 'i + Iterator<Item = TokenEntry<'i, 't, T::Annotation>>
 where
     't: 'i,
     T: TokenTree<'t>,
     E: 'i + Enqueue,
 {
-    Walk {
-        branch: None,
-        tokens: Some(TokenEntry {
-            position: Position::default(),
-            token: tree.as_token(),
-        })
-        .into_iter()
-        .collect(),
-        reader,
-    }
+    // The bound on `HierarchicalIterator` in the output is removed here. This isn't strictly
+    // necessary, but may avoid confusion about the interaction of `Enqueue` and separating filters
+    // (which operate entirely independently here).
+    with_reader_unchecked(tree, reader)
 }
 
 pub fn forward<'i, 't, T>(
@@ -336,7 +252,7 @@ where
     't: 'i,
     T: TokenTree<'t>,
 {
-    read(tree, Forward::default())
+    with_reader_unchecked(tree, Forward::default())
 }
 
 pub fn reverse<'i, 't, T>(
@@ -346,7 +262,7 @@ where
     't: 'i,
     T: TokenTree<'t>,
 {
-    read(tree, Reverse::default())
+    with_reader_unchecked(tree, Reverse::default())
 }
 
 pub fn starting<'i, 't, T>(
@@ -369,17 +285,39 @@ where
     filter_tree_tail(reverse(tree))
 }
 
-// TODO: There are some interesting tradeoffs regarding the above implementation of `starting` and
-//       `ending`.
+// NOTE: Tokens discarded by `reader` are **not** present in the feed of the output.
+fn with_reader_unchecked<'i, 't, T, E>(
+    tree: &'i T,
+    reader: E,
+) -> impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, T::Annotation>>
+where
+    't: 'i,
+    T: TokenTree<'t>,
+    E: 'i + Enqueue,
+{
+    Walk {
+        branch: None,
+        tokens: Some(TokenEntry {
+            position: Position::default(),
+            token: tree.as_token(),
+        })
+        .into_iter()
+        .collect(),
+        reader,
+    }
+}
+
+// This function is used to implement `starting` and `ending`. There are some interesting
+// considerations between this implementation via `HierarchicalIterator` and an implementation via
+// `Enqueue`, which can accomplish the same.
 //
-//       Implementing these via `HierarchicalIterator` maintains a `SeparatingFilter`
-//       implementation over the entire token tree with no caveats. However, this approach must
-//       examine every token beneath a branch and its position.
+// This function maintains a `SeparatingFilter` implementation over the entire token tree with no
+// caveats: all tokens appear in the feed. However, this approach must examine every token and
+// associated `Position`.
 //
-//       Implementing these via `Enqueue` is perhaps more straightforward and efficient. Tokens are
-//       manipulated in batches, and so runs of tokens are trivially discarded without examination.
-//       On the other hand, this filtering bypasses `SepartingFilter` and so discarded tokens
-//       cannot be observed.
+// `Enqueue` is perhaps more straightforward and efficient. Tokens are manipulated in batches, and
+// so runs of tokens are trivially discarded with little or no examination. On the other hand, such
+// filtering bypasses `SepartingFilter`, and so discarded tokens cannot be observed in the feed.
 fn filter_tree_tail<'i, 't, A>(
     tokens: impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, A>>,
 ) -> impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, A>>

diff --git a/src/walk/glob.rs b/src/walk/glob.rs
@@ -269,8 +269,8 @@ impl WalkProgram {
             // matches a glob against the subtrees in a path and there is arguably an implicit
             // empty tree. This is also more composable when partitioning and (re)building paths.
             //
-            // The result is that matching the empty glob `` against the path `foo` yields `foo`
-            // and only `foo` (assuming that the path exists).
+            // The result is that matching an empty glob against the path `foo` yields `foo` and
+            // only `foo` (assuming that the path exists).
             components: if glob.is_empty() {
                 vec![]
             }