Skip to content

Commit

Permalink
Refactor token tree iteration.
Browse files Browse the repository at this point in the history
This change introduces a `token::walk` module and refactors token
iteration. `Walk` is no longer exported and instead filtered iterators
are exposed via module functions (to avoid combining such adapters or
mutating and then adapting iterators).

`Walk` is now a `HierarchicalIterator` and supports filter composition.
To support this, the `walk::filter` module has been moved to the crate
root and is no longer feature gated.
  • Loading branch information
olson-sean-k committed Jan 19, 2024
1 parent c140681 commit 3c95ff0
Show file tree
Hide file tree
Showing 8 changed files with 368 additions and 175 deletions.
2 changes: 1 addition & 1 deletion src/walk/filter.rs → src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ impl<'i, I> WalkCancellation<'i, I> {
//
// RPITIT is slated to land at the end of December of 2023. Remove this and implement
// iterators using pure combinators when that happens.
pub(in crate::walk) fn unchecked(tree: &'i mut I) -> Self {
pub(crate) fn unchecked(tree: &'i mut I) -> Self {
WalkCancellation(tree)
}
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
mod capture;
mod diagnostics;
mod encode;
mod filter;
mod rule;
mod token;
pub mod walk;
Expand Down
22 changes: 5 additions & 17 deletions src/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use std::slice;
use thiserror::Error;

use crate::diagnostics::{CompositeSpan, CorrelatedSpan, SpanExt as _};
use crate::token::{self, Cardinality, Size, Token, TokenKind, TokenTree, Tokenized};
use crate::token::{self, walk, Cardinality, Size, Token, TokenKind, TokenTree, Tokenized};

Check failure on line 27 in src/rule.rs

View workflow job for this annotation

GitHub Actions / Lint

unresolved import `crate::token::TokenKind`
use crate::{Any, BuildError, Glob, Pattern};

/// Maximum invariant size.
Expand Down Expand Up @@ -435,37 +435,25 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> {

fn has_starting_component_boundary<'t>(token: Option<&'t Token<'t>>) -> bool {
token.map_or(false, |token| {
token
.walk()
.starting()
.any(|(_, token)| token.is_component_boundary())
walk::starting(token).any(|(_, token)| token.is_component_boundary())
})
}

fn has_ending_component_boundary<'t>(token: Option<&'t Token<'t>>) -> bool {
token.map_or(false, |token| {
token
.walk()
.ending()
.any(|(_, token)| token.is_component_boundary())
walk::ending(token).any(|(_, token)| token.is_component_boundary())
})
}

fn has_starting_zom_token<'t>(token: Option<&'t Token<'t>>) -> bool {
token.map_or(false, |token| {
token
.walk()
.starting()
.any(|(_, token)| matches!(token.kind(), Wildcard(ZeroOrMore(_))))
walk::starting(token).any(|(_, token)| matches!(token.kind(), Wildcard(ZeroOrMore(_))))
})
}

fn has_ending_zom_token<'t>(token: Option<&'t Token<'t>>) -> bool {
token.map_or(false, |token| {
token
.walk()
.ending()
.any(|(_, token)| matches!(token.kind(), Wildcard(ZeroOrMore(_))))
walk::ending(token).any(|(_, token)| matches!(token.kind(), Wildcard(ZeroOrMore(_))))
})
}

Expand Down
160 changes: 11 additions & 149 deletions src/token/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod parse;
mod variance;
pub mod walk;

use itertools::Itertools as _;
use std::borrow::Cow;
Expand Down Expand Up @@ -588,10 +589,6 @@ impl<'t, A> Token<'t, A> {
path.fold()
}

pub fn walk(&self) -> Walk<'_, 't, A> {
Walk::from(self)
}

pub fn tokens(&self) -> Option<Composition<&[Self]>> {
self.as_branch().map(BranchKind::tokens)
}
Expand Down Expand Up @@ -637,8 +634,8 @@ impl<'t, A> Token<'t, A> {
}

pub fn composition(&self) -> Composition<()> {
self.tokens()
.map_or(Composition::Conjuntive(()), |tokens| tokens.map(|_| ()))
self.as_branch()
.map_or(Composition::Conjuntive(()), |branch| branch.composition())
}

pub fn topology(&self) -> &TokenTopology<'t, A> {
Expand Down Expand Up @@ -707,8 +704,10 @@ impl<'t, A> Token<'t, A> {
// TODO: This query is a bit odd. It returns `true` for alternations (even when not all
// branches are rooted) and repetitions (even with a lower bound of zero). Either way,
// this should probably return `When`, not `bool`.
//
// Implement this via `fold`.
pub fn has_root(&self) -> bool {
self.walk().starting().any(|(_, token)| {
walk::starting(self).any(|(_, token)| {
token.as_leaf().map_or(false, |leaf| {
matches!(
leaf,
Expand All @@ -719,7 +718,7 @@ impl<'t, A> Token<'t, A> {
}

pub fn has_boundary(&self) -> bool {
self.walk().any(|(_, token)| token.boundary().is_some())
walk::forward(self).any(|(_, token)| token.boundary().is_some())
}

pub fn is_capturing(&self) -> bool {
Expand Down Expand Up @@ -982,6 +981,10 @@ impl<'t, A> BranchKind<'t, A> {
}
}

pub fn composition(&self) -> Composition<()> {
self.tokens().map(|_| ())
}

pub fn is_capturing(&self) -> bool {
matches!(self, BranchKind::Alternation(_) | BranchKind::Repetition(_))
}
Expand Down Expand Up @@ -1665,147 +1668,6 @@ impl VarianceTerm<Text<'static>> for Wildcard {
}
}

#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Position {
Conjunctive { depth: usize },
Disjunctive { depth: usize, branch: usize },
}

impl Position {
pub fn depth(&self) -> usize {
match self {
Position::Conjunctive { ref depth } | Position::Disjunctive { ref depth, .. } => *depth,
}
}

// This may appear to operate in place.
#[must_use]
fn converge(self) -> Self {
match self {
Position::Conjunctive { depth } | Position::Disjunctive { depth, .. } => {
Position::Conjunctive { depth: depth + 1 }
},
}
}

// This may appear to operate in place.
#[must_use]
fn diverge(self, branch: usize) -> Self {
match self {
Position::Conjunctive { depth } | Position::Disjunctive { depth, .. } => {
Position::Disjunctive {
depth: depth + 1,
branch,
}
},
}
}
}

impl Default for Position {
fn default() -> Self {
Position::Conjunctive { depth: 0 }
}
}

// TODO: Move this iterator and its associated types into a `walk` module.
// TODO: Consider implementing `HierarchicalIterator`. Note that filters like `starting` and
// `ending` read the entire tree. The `rule` module could also leverage this. For example,
// `rule::group` is only interested in non-concatenation branch tokens, which is probably a
// good case for `HierarchicalIterator::filter_map_tree`.
#[derive(Clone, Debug)]
pub struct Walk<'i, 't, A> {
buffer: VecDeque<(Position, &'i Token<'t, A>)>,
}

impl<'i, 't, A> Walk<'i, 't, A>
where
't: 'i,
A: 't,
{
pub fn starting(self) -> impl 'i + Iterator<Item = (Position, &'i Token<'t, A>)> {
self.peekable().batching(|tokens| {
if let Some((position, token)) = tokens.next() {
tokens
.peeking_take_while(|(next, _)| *next == position)
.for_each(drop);
Some((position, token))
}
else {
None
}
})
}

pub fn ending(self) -> impl 'i + Iterator<Item = (Position, &'i Token<'t, A>)> {
self.peekable().batching(|tokens| {
if let Some((position, _)) = tokens.peek().copied() {
tokens
.peeking_take_while(|(next, _)| *next == position)
.last()
}
else {
None
}
})
}
}

impl<'i, 't, A> From<&'i Token<'t, A>> for Walk<'i, 't, A> {
fn from(token: &'i Token<'t, A>) -> Self {
Walk {
buffer: Some((Position::default(), token)).into_iter().collect(),
}
}
}

impl<'i, 't, A> From<&'i Vec<Token<'t, A>>> for Walk<'i, 't, A> {
fn from(tokens: &'i Vec<Token<'t, A>>) -> Self {
Walk {
buffer: tokens
.iter()
.map(|token| (Position::default(), token))
.collect(),
}
}
}

impl<'i, 't, A> Iterator for Walk<'i, 't, A>
where
't: 'i,
A: 't,
{
type Item = (Position, &'i Token<'t, A>);

fn next(&mut self) -> Option<Self::Item> {
if let Some((position, token)) = self.buffer.pop_front() {
match token.tokens() {
Some(Composition::Conjunctive(tokens)) => self
.buffer
.extend(tokens.iter().map(|token| (position.converge(), token))),
// TODO: Previously, this pushed a divergent position **for each token in an
// implicitly concatenated branch**. This code does this for each token
// within an alternation, but these are (almost?) always themselves
// concatenations now. The tokens within such a concatenation will be
// conjunctive. Some code may break due to bad assumptions here.
Some(Composition::Disjunctive(tokens)) => self.buffer.extend(
tokens
.iter()
.enumerate()
.map(|(branch, token)| (position.diverge(branch), token)),
),
_ => {},
};
Some((position, token))
}
else {
None
}
}
}

// TODO: Move component types above `Token` like other similarly used types.

#[derive(Clone, Debug)]
pub struct LiteralSequence<'i, 't>(Vec<&'i Literal<'t>>);

Expand Down
6 changes: 2 additions & 4 deletions src/token/variance/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
pub mod invariant;

use itertools::Itertools as _;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::marker::PhantomData;
use std::ops::Add;

use crate::token::variance::invariant::{Invariant, Text};
use crate::token::{self, BranchKind, Fold, LeafKind, Separator, Token};
use crate::token::variance::invariant::Invariant;
use crate::token::{BranchKind, Fold, LeafKind};

pub trait VarianceTerm<T> {
fn term(&self) -> Variance<T>;
Expand Down

0 comments on commit 3c95ff0

Please sign in to comment.