Skip to content

Commit

Permalink
Consider adjacent tokens when constructing a HIR.
Browse files Browse the repository at this point in the history
  • Loading branch information
olson-sean-k committed Mar 30, 2024
1 parent 9103b60 commit 0620b10
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 17 deletions.
34 changes: 20 additions & 14 deletions src/hir.rs
Expand Up @@ -128,11 +128,12 @@ where
term
}

fn term(&mut self, _: impl FoldPosition<'t, A>, leaf: &LeafKind<'t>) -> Self::Term {
fn term(&mut self, position: impl FoldPosition<'t, A>, leaf: &LeafKind<'t>) -> Self::Term {
use token::Wildcard::{One, Tree, ZeroOrMore};
use Archetype::{Character, Range};
use LeafKind::{Class, Literal, Separator, Wildcard};

let adjacency = position.adjacency();
match leaf {
Class(ref class) => {
let is_negated = class.is_negated();
Expand Down Expand Up @@ -161,11 +162,17 @@ where
Hir::literal(literal.text().as_bytes())
}
},
// TODO: Separators should probably also match the end of text when they are at the
// end of a glob expression. This may not be possible in a fold with simple
// terms though, since that positional information isn't available until
// reaching the root of the token tree.
Separator(_) => self::separator().into_hir(),
Separator(_) => {
if adjacency.right.is_some() {
self::separator().into_hir()
}
else {
Hir::alternation(vec![
self::separator().into_hir(),
Hir::look(hir::Look::End),
])
}
},
Wildcard(ref wildcard) => match wildcard {
One => Hir::class(hir::Class::Unicode(self::not_separator())),
Tree { has_root } => Hir::alternation(vec![
Expand All @@ -190,12 +197,13 @@ where
self::separator().into_hir(),
Hir::empty(),
]),
// TODO: Zero or more wildcards should match **one** or more if they comprise
// the entirety of a component, such as in `a/*/b`. This may not be
// possible in a fold with simple terms though, since adjacency
// information isn't available until reaching the root of the token tree.
ZeroOrMore(ref evaluation) => Hir::repetition(hir::Repetition {
min: 0,
min: if adjacency.is_open() || adjacency.is_closed_boundary() {
1
}
else {
0
},
max: None,
greedy: evaluation.is_eager(),
sub: Box::new(self::not_separator().into_hir()),
Expand All @@ -215,9 +223,7 @@ where
.iter()
.adjacent()
.map(|token| {
let hir = token
.fold_with_adjacent(Compile::default())
.unwrap_or_else(Hir::empty);
let hir = token.fold_with_adjacent(Compile).unwrap_or_else(Hir::empty);
if token.into_item().is_capturing() {
let index = capture_group_index;
capture_group_index = capture_group_index
Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Expand Up @@ -268,6 +268,10 @@ pub trait SliceProjection: Index<usize, Output = Self::Item> {
fn get(&self, index: usize) -> Option<&Self::Item>;

fn len(&self) -> usize;

fn is_empty(&self) -> bool {
self.len() == 0
}
}

impl<T> SliceProjection for [T] {
Expand Down
20 changes: 17 additions & 3 deletions src/token/walk.rs
Expand Up @@ -38,7 +38,7 @@ impl<'i, 't, A> ParentToken<'i, 't, A> for &'i BranchKind<'t, A> {
type Child = &'i Token<'t, A>;

fn as_ref(&self) -> &BranchKind<'t, A> {
*self
self
}

fn into_tokens(self) -> impl DoubleEndedIterator<Item = Self::Child> {
Expand Down Expand Up @@ -67,8 +67,8 @@ impl<'t, A> ChildToken<'t, A> for Token<'t, A> {

#[derive(Debug)]
pub struct Adjacency<'i, 't, A> {
left: Option<&'i Token<'t, A>>,
right: Option<&'i Token<'t, A>>,
pub left: Option<&'i Token<'t, A>>,
pub right: Option<&'i Token<'t, A>>,
}

impl<'i, 't, A> Adjacency<'i, 't, A> {
Expand All @@ -80,6 +80,20 @@ impl<'i, 't, A> Adjacency<'i, 't, A> {
right: right.or(self.right),
}
}

pub fn is_open(&self) -> bool {
self.left.is_none() && self.right.is_none()
}

pub fn is_closed(&self) -> bool {
self.left.is_some() && self.right.is_some()
}

pub fn is_closed_boundary(&self) -> bool {
let is_boundary =
|token: Option<&Token<'_, _>>| token.map_or(false, |token| token.boundary().is_some());
is_boundary(self.left) && is_boundary(self.right)
}
}

impl<'i, 't, A> Clone for Adjacency<'i, 't, A> {
Expand Down

0 comments on commit 0620b10

Please sign in to comment.