diff --git a/grammars/benches/json.rs b/grammars/benches/json.rs index 120774a9..5209c515 100644 --- a/grammars/benches/json.rs +++ b/grammars/benches/json.rs @@ -65,7 +65,7 @@ fn line_col_benchmark(c: &mut Criterion) { let last_pair = pairs.last().unwrap(); c.bench_function("line col", |b| { b.iter(|| { - let _ = last_pair.as_span().start_pos().line_col(); + last_pair.line_col(); }); }); } diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index 6d310272..d268eadd 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -12,6 +12,7 @@ use alloc::vec::Vec; use core::fmt; use super::pair::{self, Pair}; +use super::pairs::{Cursor, CursorPairs}; use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; use crate::RuleType; @@ -28,6 +29,7 @@ pub struct FlatPairs<'i, R> { input: &'i str, start: usize, end: usize, + cursor: Cursor, } /// # Safety @@ -44,6 +46,7 @@ pub unsafe fn new( input, start, end, + cursor: Cursor::default(), } } @@ -107,7 +110,15 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { return None; } - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) }; + let pair = unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + self.start, + self.cursor.clone(), + ) + }; + self.move_cursor(pair.as_str()); self.next_start(); @@ -123,7 +134,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> { self.next_start_from_end(); - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) }; + let pair = unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + self.end, + self.cursor.clone(), + ) + }; Some(pair) } @@ -144,10 +162,21 @@ impl<'i, R: Clone> Clone for FlatPairs<'i, R> { input: self.input, start: self.start, end: self.end, + cursor: self.cursor.clone(), } } } +impl<'i, R: RuleType> CursorPairs for FlatPairs<'i, R> { + fn cursor(&self) -> Cursor { + self.cursor.clone() + } + + fn cursor_mut(&mut self) -> &mut Cursor { + &mut self.cursor + } +} + #[cfg(test)] mod tests { use super::super::super::macros::tests::*; diff --git a/pest/src/iterators/pair.rs b/pest/src/iterators/pair.rs index 62c95e03..557794e1 100644 --- a/pest/src/iterators/pair.rs +++ b/pest/src/iterators/pair.rs @@ -20,7 +20,7 @@ use core::str; #[cfg(feature = "pretty-print")] use serde::ser::SerializeStruct; -use super::pairs::{self, Pairs}; +use super::pairs::{self, Cursor, Pairs}; use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; use crate::span::{self, Span}; @@ -43,6 +43,9 @@ pub struct Pair<'i, R> { input: &'i str, /// Token index into `queue`. start: usize, + + line: usize, + col: usize, } /// # Safety @@ -52,11 +55,14 @@ pub unsafe fn new( queue: Rc>>, input: &str, start: usize, + cursor: Cursor, ) -> Pair<'_, R> { Pair { queue, input, start, + line: cursor.line, + col: cursor.col, } } @@ -241,6 +247,11 @@ impl<'i, R: RuleType> Pair<'i, R> { ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.") } + /// Returns the `line`, `col` of this pair start. + pub fn line_col(&self) -> (usize, usize) { + (self.line, self.col) + } + fn pair(&self) -> usize { match self.queue[self.start] { QueueableToken::Start { diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index e478cebf..30eec02e 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -25,6 +25,82 @@ use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; use crate::RuleType; +#[derive(Clone)] +pub struct Cursor { + pub line: usize, + pub col: usize, +} + +impl Default for Cursor { + fn default() -> Cursor { + Cursor { line: 1, col: 1 } + } +} + +impl Cursor { + fn get(&self) -> (usize, usize) { + (self.line, self.col) + } +} + +pub trait CursorPairs { + fn cursor(&self) -> Cursor; + fn cursor_mut(&mut self) -> &mut Cursor; + + /// Move the (line, col) with string part + fn move_cursor(&mut self, part: &str) -> (usize, usize) { + let (l, c, has_new_line) = self.line_col(part); + + let (prev_line, prev_col) = self.cursor().get(); + + self.cursor_mut().line += l; + if has_new_line { + self.cursor_mut().col = c; + } else { + self.cursor_mut().col += c; + } + (prev_line, prev_col) + } + + /// Calculate line and col number of a string part + /// Fork from Pest for just count the part. + /// + /// https://github.com/pest-parser/pest/blob/85b18aae23cc7b266c0b5252f9f74b7ab0000795/pest/src/position.rs#L135 + fn line_col(&self, part: &str) -> (usize, usize, bool) { + let mut chars = part.chars().peekable(); + + let mut line_col = (0, 0); + let mut has_new_line = false; + + loop { + match chars.next() { + Some('\r') => { + if let Some(&'\n') = chars.peek() { + chars.next(); + + line_col = (line_col.0 + 1, 1); + has_new_line = true; + } else { + line_col = (line_col.0, line_col.1 + 1); + } + } + Some('\n') => { + line_col = (line_col.0 + 1, 1); + has_new_line = true; + } + Some(_c) => { + line_col = (line_col.0, line_col.1 + 1); + } + None => { + break; + } + } + } + + (line_col.0, line_col.1, has_new_line) + } +} + /// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`]. /// /// [`Pair`]: struct.Pair.html @@ -36,6 +112,7 @@ pub struct Pairs<'i, R> { input: &'i str, start: usize, end: usize, + cursor: Cursor, } pub fn new( @@ -49,6 +126,7 @@ pub fn new( input, start, end, + cursor: Cursor::default(), } } @@ -181,7 +259,14 @@ impl<'i, R: RuleType> Pairs<'i, R> { #[inline] pub fn peek(&self) -> Option> { if self.start < self.end { - Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) }) + Some(unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + self.start, + self.cursor.clone(), + ) + }) } else { None } @@ -221,11 +306,22 @@ impl<'i, R: RuleType> Pairs<'i, R> { } } +impl<'i, R: RuleType> CursorPairs for Pairs<'i, R> { + fn cursor(&self) -> Cursor { + self.cursor.clone() + } + + fn cursor_mut(&mut self) -> &mut Cursor { + &mut self.cursor + } +} + impl<'i, R: RuleType> Iterator for Pairs<'i, R> { type Item = Pair<'i, R>; fn next(&mut self) -> Option { let pair = self.peek()?; + self.move_cursor(pair.as_str()); self.start = self.pair() + 1; Some(pair) } @@ -239,7 +335,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { self.end = self.pair_from_end(); - let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) }; + let pair = unsafe { + pair::new( + Rc::clone(&self.queue), + self.input, + self.end, + self.cursor.clone(), + ) + }; Some(pair) } @@ -423,4 +526,18 @@ mod tests { vec![Rule::c, Rule::a] ); } + + #[test] + fn test_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abcde\nabcde").unwrap(); + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); + assert_eq!(pairs.cursor.get(), (1, 4)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (1, 4)); + assert_eq!(pairs.cursor.get(), (1, 5)); + } } diff --git a/pest/src/lib.rs b/pest/src/lib.rs index 5cf2f9e2..fa4df200 100644 --- a/pest/src/lib.rs +++ b/pest/src/lib.rs @@ -360,6 +360,7 @@ pub mod prec_climber; mod span; mod stack; mod token; + #[doc(hidden)] pub mod unicode;