diff --git a/grammars/benches/json.rs b/grammars/benches/json.rs index 120774a9..7e5fbb99 100644 --- a/grammars/benches/json.rs +++ b/grammars/benches/json.rs @@ -56,16 +56,37 @@ item = _{ SOI ~ line* ~ EOI } pub struct JsonParser; } +// With 500 times iter +// pair.line_col time: [2.9937 µs 2.9975 µs 3.0018 µs] +// position.line_col time: [212.59 µs 213.38 µs 214.29 µs] +// position.line_col (with fast-line-col) time: [18.241 µs 18.382 µs 18.655 µs] +// +// With 1000 times iter +// pair.line_col time: [10.814 µs 10.846 µs 10.893 µs] +// position.line_col time: [90.135 µs 93.901 µs 98.655 µs] +// position.line_col (with fast-line-col) time: [1.7199 ms 1.7246 ms 1.7315 ms] fn line_col_benchmark(c: &mut Criterion) { let mut file = File::open("benches/main.i18n.json").unwrap(); let mut data = String::new(); file.read_to_string(&mut data).unwrap(); let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap(); - let last_pair = pairs.last().unwrap(); - c.bench_function("line col", |b| { + + c.bench_function("pair.line_col", |b| { + b.iter(|| { + let mut pairs = pairs.clone(); + for _ in 0..500 { + pairs.next().unwrap().line_col(); + } + }) + }); + + c.bench_function("position.line_col", |b| { b.iter(|| { - let _ = last_pair.as_span().start_pos().line_col(); + let mut pairs = pairs.clone(); + for _ in 0..500 { + pairs.next().unwrap().as_span().start_pos().line_col(); + } }); }); } diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index 6d310272..411d88b2 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -108,7 +108,6 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { } let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) }; - self.next_start(); Some(pair) @@ -177,4 +176,24 @@ mod tests { vec![Rule::c, Rule::b, Rule::a] ); } + + #[test] + fn test_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abcNe\nabcde").unwrap().flatten(); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); + assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "b"); + assert_eq!(pair.line_col(), (1, 2)); + assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (1, 5)); + assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); + } } diff --git a/pest/src/iterators/pair.rs b/pest/src/iterators/pair.rs index 62c95e03..2c813478 100644 --- a/pest/src/iterators/pair.rs +++ b/pest/src/iterators/pair.rs @@ -43,6 +43,7 @@ pub struct Pair<'i, R> { input: &'i str, /// Token index into `queue`. start: usize, + pub(crate) line_col: Option<(usize, usize)>, } /// # Safety @@ -57,6 +58,7 @@ pub unsafe fn new( queue, input, start, + line_col: None, } } @@ -241,6 +243,14 @@ impl<'i, R: RuleType> Pair<'i, R> { ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.") } + /// Returns the `line`, `col` of this pair start. + pub fn line_col(&self) -> (usize, usize) { + match &self.line_col { + Some(line_col) => (line_col.0, line_col.1), + None => self.as_span().start_pos().line_col(), + } + } + fn pair(&self) -> usize { match self.queue[self.start] { QueueableToken::Start { diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index e478cebf..a43dc528 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -23,8 +23,24 @@ use super::flat_pairs::{self, FlatPairs}; use super::pair::{self, Pair}; use super::queueable_token::QueueableToken; use super::tokens::{self, Tokens}; -use crate::RuleType; +use crate::{position, RuleType}; +#[derive(Clone)] +pub struct Cursor { + pub line: usize, + pub col: usize, + pub end: usize, +} + +impl Default for Cursor { + fn default() -> Cursor { + Cursor { + line: 1, + col: 1, + end: 0, + } + } +} /// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`]. /// /// [`Pair`]: struct.Pair.html @@ -36,6 +52,7 @@ pub struct Pairs<'i, R> { input: &'i str, start: usize, end: usize, + cursor: Cursor, } pub fn new( @@ -49,6 +66,7 @@ pub fn new( input, start, end, + cursor: Cursor::default(), } } @@ -219,13 +237,50 @@ impl<'i, R: RuleType> Pairs<'i, R> { } } } + + /// Move the cursor (line, col) by a part of the input. + fn move_cursor(&mut self, input: &str, start: usize, end: usize) -> (usize, usize) { + // Move cursor for some skiped characters (by skip(n)) + let prev_end = self.cursor.end; + if prev_end != start { + self.move_cursor(input, prev_end, start); + } + + let (prev_line, prev_col) = (self.cursor.line, self.cursor.col); + + let part = &input[self.cursor.end..end]; + let (l, c) = position::line_col(part, part.len()); + + // Because the `original_line_col` returns (line, col) is start from 1 + let l = l - 1; + let mut c = c - 1; + if c < 1 { + c = 1 + } + + self.cursor.line += l; + // Has new line + if l > 0 { + self.cursor.col = c; + } else { + self.cursor.col += c; + } + self.cursor.end = end; + + (prev_line, prev_col) + } } impl<'i, R: RuleType> Iterator for Pairs<'i, R> { type Item = Pair<'i, R>; fn next(&mut self) -> Option { - let pair = self.peek()?; + let mut pair = self.peek()?; + let span = pair.as_span(); + + let (l, c) = self.move_cursor(self.input, span.start(), span.end()); + pair.line_col = Some((l, c)); + self.start = self.pair() + 1; Some(pair) } @@ -423,4 +478,48 @@ mod tests { vec![Rule::c, Rule::a] ); } + + #[test] + fn test_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); + assert_eq!( + (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end), + (1, 4, 3) + ); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (2, 1)); + assert_eq!( + (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end), + (2, 2, 5) + ); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "fgh"); + assert_eq!(pair.line_col(), (2, 2)); + assert_eq!( + (pairs.cursor.line, pairs.cursor.col, pairs.cursor.end), + (2, 5, 8) + ); + } + + #[test] + fn test_rev_iter_line_col() { + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "fgh"); + assert_eq!(pair.line_col(), (2, 2)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "e"); + assert_eq!(pair.line_col(), (2, 1)); + + let pair = pairs.next().unwrap(); + assert_eq!(pair.as_str(), "abc"); + assert_eq!(pair.line_col(), (1, 1)); + } } diff --git a/pest/src/lib.rs b/pest/src/lib.rs index 5cf2f9e2..fa4df200 100644 --- a/pest/src/lib.rs +++ b/pest/src/lib.rs @@ -360,6 +360,7 @@ pub mod prec_climber; mod span; mod stack; mod token; + #[doc(hidden)] pub mod unicode; diff --git a/pest/src/macros.rs b/pest/src/macros.rs index 0ca6d9e0..1b83f388 100644 --- a/pest/src/macros.rs +++ b/pest/src/macros.rs @@ -329,6 +329,7 @@ pub mod tests { a, b, c, + d, } pub struct AbcParser; @@ -345,6 +346,7 @@ pub mod tests { .skip(1) }) .and_then(|s| s.skip(1).unwrap().rule(Rule::c, |s| s.match_string("e"))) + .and_then(|s| s.optional(|s| s.rule(Rule::d, |s| s.match_string("fgh")))) }) } } diff --git a/pest/src/position.rs b/pest/src/position.rs index f91f8291..559cf446 100644 --- a/pest/src/position.rs +++ b/pest/src/position.rs @@ -116,6 +116,9 @@ impl<'i> Position<'i> { /// Returns the line and column number of this `Position`. /// + /// This is an O(n) operation, where n is the number of chars in the input. + /// You better use [`pair.line_col()`](struct.Pair.html#method.line_col) instead. + /// /// # Examples /// /// ``` @@ -135,14 +138,8 @@ impl<'i> Position<'i> { if self.pos > self.input.len() { panic!("position out of bounds"); } - #[cfg(feature = "fast-line-col")] - { - fast_line_col(self.input, self.pos) - } - #[cfg(not(feature = "fast-line-col"))] - { - original_line_col(self.input, self.pos) - } + + line_col(self.input, self.pos) } /// Returns the entire line of the input that contains this `Position`. @@ -455,6 +452,17 @@ impl<'i> Hash for Position<'i> { } } +pub(crate) fn line_col(input: &str, pos: usize) -> (usize, usize) { + #[cfg(feature = "fast-line-col")] + { + fast_line_col(input, pos) + } + #[cfg(not(feature = "fast-line-col"))] + { + original_line_col(input, pos) + } +} + #[inline] #[cfg(not(feature = "fast-line-col"))] fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) {