Skip to content

Commit

Permalink
Implement group checks iteratively.
Browse files Browse the repository at this point in the history
  • Loading branch information
olson-sean-k committed Jan 26, 2024
1 parent 043caf3 commit 7223de9
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 165 deletions.
107 changes: 50 additions & 57 deletions src/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use itertools::Itertools as _;
#[cfg(feature = "miette")]
use miette::{Diagnostic, LabeledSpan, SourceCode};
use std::borrow::Cow;
use std::collections::VecDeque;
use std::convert::Infallible;
#[cfg(feature = "miette")]
use std::fmt::Display;
Expand Down Expand Up @@ -477,7 +478,7 @@ where

fn is_some_and_any_in<'i, 't, A, R, I, P>(
token: Option<&'i Token<'t, A>>,
read: R,
traversal: R,
mut predicate: P,
) -> bool
where
Expand All @@ -486,7 +487,7 @@ where
P: FnMut(&'i Token<'t, A>) -> bool,
{
token.map_or(false, |token| {
read(token).any(move |TokenEntry { token, .. }| predicate(token))
traversal(token).any(move |TokenEntry { token, .. }| predicate(token))
})
}

Expand Down Expand Up @@ -520,7 +521,7 @@ where
#[allow(clippy::ptr_arg)] expression: &'i Cow<'t, str>,
token: &'i Token<'t, A>,
label: &'static str,
) -> impl 'i + Copy + Fn(CorrelatedError) -> RuleError<'t>
) -> impl 'i + Copy + FnOnce(CorrelatedError) -> RuleError<'t>
where
't: 'i,
A: Spanned,
Expand All @@ -534,51 +535,6 @@ where
}
}

// TODO: Implement this iteratively.
fn recurse<'i, 't, A>(
// This is a somewhat unusual API, but it allows the lifetime `'t` of the `Cow` to be
// properly forwarded to output values (`RuleError`).
#[allow(clippy::ptr_arg)] expression: &Cow<'t, str>,
concatenation: &'i [Token<'t, A>],
outer: Outer<'i, 't, A>,
) -> Result<(), RuleError<'t>>
where
't: 'i,
A: Spanned,
{
use BranchKind::{Alternation, Repetition};

for (left, token, right) in concatenation.iter().adjacent().map(Adjacency::into_tuple) {
match token.as_branch() {
Some(Alternation(ref alternation)) => {
let outer = outer.or(left, right);
let diagnose = diagnose(expression, token, "in this alternation");
for token in alternation.tokens() {
let tokens = token.concatenation();
if let Some(terminals) = tokens.terminals() {
check_group(terminals, outer).map_err(diagnose)?;
check_group_alternative(terminals, outer).map_err(diagnose)?;
}
recurse(expression, tokens, outer)?;
}
},
Some(Repetition(ref repetition)) => {
let outer = outer.or(left, right);
let diagnose = diagnose(expression, token, "in this repetition");
let tokens = repetition.token().concatenation();
if let Some(terminals) = tokens.terminals() {
check_group(terminals, outer).map_err(diagnose)?;
check_group_repetition(terminals, outer, repetition.cardinality())
.map_err(diagnose)?;
}
recurse(expression, tokens, outer)?;
},
_ => {},
}
}
Ok(())
}

fn check_group<'i, 't, A>(
terminals: Terminals<&'i Token<'t, A>>,
outer: Outer<'i, 't, A>,
Expand Down Expand Up @@ -773,21 +729,58 @@ where
}
}

recurse(
tree.expression(),
tree.as_token().concatenation(),
Outer::default(),
)
let mut outer = Outer::default();
let mut tokens: VecDeque<_> = Some(tree.as_token()).into_iter().collect();
while let Some(token) = tokens.pop_front() {
use BranchKind::{Alternation, Repetition};

for (left, token, right) in token
.concatenation()
.iter()
.adjacent()
.map(Adjacency::into_tuple)
{
match token.as_branch() {
Some(Alternation(ref alternation)) => {
outer = outer.or(left, right);
let diagnose = diagnose(tree.expression(), token, "in this alternation");
for token in alternation.tokens() {
let concatenation = token.concatenation();
if let Some(terminals) = concatenation.terminals() {
check_group(terminals, outer).map_err(diagnose)?;
check_group_alternative(terminals, outer).map_err(diagnose)?;
}
}
tokens.extend(alternation.tokens());
},
Some(Repetition(ref repetition)) => {
outer = outer.or(left, right);
let diagnose = diagnose(tree.expression(), token, "in this repetition");
let token = repetition.token();
let concatenation = token.concatenation();
if let Some(terminals) = concatenation.terminals() {
check_group(terminals, outer).map_err(diagnose)?;
check_group_repetition(terminals, outer, repetition.cardinality())
.map_err(diagnose)?;
}
tokens.push_back(token);
},
_ => {},
}
}
}
Ok(())
}

// TODO: `Cardinality` orders its inputs and so repetition expressions like `<a:6,1>` where the
// finite bounds are in nonconventional order cannot be detected here (and construct
// reasonable tokens). The parser disallows any ambiguous syntax, but a question remains:
// should such bounds be rejected? If so, `Cardinality` may need an unchecked constructor.
// finite bounds are in nonconventional order cannot be detected here. The parser disallows
// any ambiguous syntax, but a question remains: should such bounds be rejected? If so,
// `Cardinality` may need an unchecked constructor.
//
// For now, this function only checks for empty bounds like `<a:0,0>`.
//
// This highlights the advantages of using separate syntax and semantic (intermediate) trees.
// This highlights the advantages of using separate syntactic and semantic (intermediate)
// trees.
fn cardinality<'t, A>(tree: &Tokenized<'t, A>) -> Result<(), RuleError<'t>>
where
A: Spanned,
Expand Down
2 changes: 1 addition & 1 deletion src/token/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ pub fn parse(expression: &str) -> Result<Tokenized<'_, ExpressionMetadata>, Pars
if expression.is_empty() {
Ok(Tokenized {
expression: expression.into(),
token: Token::empty((0, 0)),
token: Token::empty(Default::default()),
})
}
else {
Expand Down
148 changes: 43 additions & 105 deletions src/token/walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,49 +20,6 @@ impl<'i, 't, A> Isomeric for TokenFeed<'i, 't, A> {
}
}

//#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
//pub enum Position {
// Conjunctive { depth: usize },
// Disjunctive { depth: usize, branch: usize },
//}
//
//impl Position {
// pub fn depth(&self) -> usize {
// match self {
// Position::Conjunctive { ref depth } | Position::Disjunctive { ref depth, .. } => *depth,
// }
// }
//
// // This may appear to operate in place.
// #[must_use]
// fn conjunction(self) -> Self {
// match self {
// Position::Conjunctive { depth } | Position::Disjunctive { depth, .. } => {
// Position::Conjunctive { depth: depth + 1 }
// },
// }
// }
//
// // This may appear to operate in place.
// #[must_use]
// fn disjunction(self, branch: usize) -> Self {
// match self {
// Position::Conjunctive { depth } | Position::Disjunctive { depth, .. } => {
// Position::Disjunctive {
// depth: depth + 1,
// branch,
// }
// },
// }
// }
//}
//
//impl Default for Position {
// fn default() -> Self {
// Position::Conjunctive { depth: 0 }
// }
//}

#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
pub struct Depth {
pub conjunctive: usize,
Expand Down Expand Up @@ -119,6 +76,8 @@ impl Position {
}
}

// Together with `Child`, this type prevents `Enqueue` implementers from enqueuing arbitrary tokens
// that are not associated with a branch.
#[derive(Debug)]
#[repr(transparent)]
pub struct Parent<T>(T);
Expand All @@ -139,6 +98,8 @@ impl<'i, 't, A> TokenParent<'i, 't, A> {
}
}

// Together with `Parent`, this type prevents `Enqueue` implementers from enqueuing arbitrary
// tokens that are not associated with a branch.
#[derive(Debug)]
#[repr(transparent)]
pub struct Child<T>(T);
Expand Down Expand Up @@ -182,42 +143,6 @@ impl Enqueue for Reverse {
}
}

#[derive(Default)]
pub struct Starting;

impl Enqueue for Starting {
fn enqueue<'i, 't, A>(
&mut self,
parent: TokenParent<'i, 't, A>,
) -> impl Iterator<Item = TokenChild<'i, 't, A>> {
let composition = parent.as_ref().composition();
let tokens = parent.into_tokens();
let n = match composition {
Composition::Conjunctive(_) => 1,
Composition::Disjunctive(_) => tokens.len(),
};
tokens.take(n)
}
}

#[derive(Default)]
pub struct Ending;

impl Enqueue for Ending {
fn enqueue<'i, 't, A>(
&mut self,
parent: TokenParent<'i, 't, A>,
) -> impl Iterator<Item = TokenChild<'i, 't, A>> {
let composition = parent.as_ref().composition();
let tokens = parent.into_tokens();
let n = match composition {
Composition::Conjunctive(_) => tokens.len().saturating_sub(1),
Composition::Disjunctive(_) => 0,
};
tokens.skip(n)
}
}

#[derive(Clone, Copy, Debug)]
pub struct WalkEntry<T> {
pub position: Position,
Expand Down Expand Up @@ -305,28 +230,19 @@ where
type Feed = TokenFeed<'i, 't, A>;
}

// TODO: This function outputs an `Iterator` rather than a `HierarchicalIterator`, because any
// filtering performed by the `Enqueue` bypasses `SeparatingFilter`. This is a purely
// philosophical choice: does such filtering betray the API in some way or not?
pub fn read<'i, 't, T, E>(
pub fn with_reader<'i, 't, T, E>(
tree: &'i T,
reader: E,
) -> impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, T::Annotation>>
) -> impl 'i + Iterator<Item = TokenEntry<'i, 't, T::Annotation>>
where
't: 'i,
T: TokenTree<'t>,
E: 'i + Enqueue,
{
Walk {
branch: None,
tokens: Some(TokenEntry {
position: Position::default(),
token: tree.as_token(),
})
.into_iter()
.collect(),
reader,
}
// The bound on `HierarchicalIterator` in the output is removed here. This isn't strictly
// necessary, but may avoid confusion about the interaction of `Enqueue` and separating filters
// (which operate entirely independently here).
with_reader_unchecked(tree, reader)
}

pub fn forward<'i, 't, T>(
Expand All @@ -336,7 +252,7 @@ where
't: 'i,
T: TokenTree<'t>,
{
read(tree, Forward::default())
with_reader_unchecked(tree, Forward::default())
}

pub fn reverse<'i, 't, T>(
Expand All @@ -346,7 +262,7 @@ where
't: 'i,
T: TokenTree<'t>,
{
read(tree, Reverse::default())
with_reader_unchecked(tree, Reverse::default())
}

pub fn starting<'i, 't, T>(
Expand All @@ -369,17 +285,39 @@ where
filter_tree_tail(reverse(tree))
}

// TODO: There are some interesting tradeoffs regarding the above implementation of `starting` and
// `ending`.
// NOTE: Tokens discarded by `reader` are **not** present in the feed of the output.
fn with_reader_unchecked<'i, 't, T, E>(
tree: &'i T,
reader: E,
) -> impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, T::Annotation>>
where
't: 'i,
T: TokenTree<'t>,
E: 'i + Enqueue,
{
Walk {
branch: None,
tokens: Some(TokenEntry {
position: Position::default(),
token: tree.as_token(),
})
.into_iter()
.collect(),
reader,
}
}

// This function is used to implement `starting` and `ending`. There are some interesting
// considerations between this implementation via `HierarchicalIterator` and an implementation via
// `Enqueue`, which can accomplish the same.
//
// Implementing these via `HierarchicalIterator` maintains a `SeparatingFilter`
// implementation over the entire token tree with no caveats. However, this approach must
// examine every token beneath a branch and its position.
// This function maintains a `SeparatingFilter` implementation over the entire token tree with no
// caveats: all tokens appear in the feed. However, this approach must examine every token and
// associated `Position`.
//
// Implementing these via `Enqueue` is perhaps more straightforward and efficient. Tokens are
// manipulated in batches, and so runs of tokens are trivially discarded without examination.
// On the other hand, this filtering bypasses `SepartingFilter` and so discarded tokens
// cannot be observed.
// `Enqueue` is perhaps more straightforward and efficient. Tokens are manipulated in batches, and
// so runs of tokens are trivially discarded with little or no examination. On the other hand, such
// filtering bypasses `SepartingFilter`, and so discarded tokens cannot be observed in the feed.
fn filter_tree_tail<'i, 't, A>(
tokens: impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, A>>,
) -> impl 'i + HierarchicalIterator<Feed = TokenFeed<'i, 't, A>>
Expand Down
4 changes: 2 additions & 2 deletions src/walk/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ impl WalkProgram {
// matches a glob against the subtrees in a path and there is arguably an implicit
// empty tree. This is also more composable when partitioning and (re)building paths.
//
// The result is that matching the empty glob `` against the path `foo` yields `foo`
// and only `foo` (assuming that the path exists).
// The result is that matching an empty glob against the path `foo` yields `foo` and
// only `foo` (assuming that the path exists).
components: if glob.is_empty() {
vec![]
}
Expand Down

0 comments on commit 7223de9

Please sign in to comment.