Skip to content

Commit

Permalink
Improve line, col calculate performance by use move cursor on Pairs I…
Browse files Browse the repository at this point in the history
…terator. (#754)

* Improve line, col calculate performance by use move cursor on Pairs Iterator.

ref: #707, #560

* Add benchmark for pair.line_col vs position.line_cole

* Fix flat_pairs and pairs.next_back to use position.line_col

* Merge line_col method to use `position::line_col`.

* Fix `pair.line_col` for supports skiped characters, and add test for rev iter.
  • Loading branch information
huacnlee committed Dec 29, 2022
1 parent 024b857 commit 79746cf
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 14 deletions.
27 changes: 24 additions & 3 deletions grammars/benches/json.rs
Expand Up @@ -56,16 +56,37 @@ item = _{ SOI ~ line* ~ EOI }
pub struct JsonParser;
}

// With 500 times iter
// pair.line_col time: [2.9937 µs 2.9975 µs 3.0018 µs]
// position.line_col time: [212.59 µs 213.38 µs 214.29 µs]
// position.line_col (with fast-line-col) time: [18.241 µs 18.382 µs 18.655 µs]
//
// With 1000 times iter
// pair.line_col time: [10.814 µs 10.846 µs 10.893 µs]
// position.line_col time: [90.135 µs 93.901 µs 98.655 µs]
// position.line_col (with fast-line-col) time: [1.7199 ms 1.7246 ms 1.7315 ms]
fn line_col_benchmark(c: &mut Criterion) {
let mut file = File::open("benches/main.i18n.json").unwrap();
let mut data = String::new();

file.read_to_string(&mut data).unwrap();
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();
let last_pair = pairs.last().unwrap();
c.bench_function("line col", |b| {

c.bench_function("pair.line_col", |b| {
b.iter(|| {
let mut pairs = pairs.clone();
for _ in 0..500 {
pairs.next().unwrap().line_col();
}
})
});

c.bench_function("position.line_col", |b| {
b.iter(|| {
let _ = last_pair.as_span().start_pos().line_col();
let mut pairs = pairs.clone();
for _ in 0..500 {
pairs.next().unwrap().as_span().start_pos().line_col();
}
});
});
}
Expand Down
21 changes: 20 additions & 1 deletion pest/src/iterators/flat_pairs.rs
Expand Up @@ -108,7 +108,6 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
}

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) };

self.next_start();

Some(pair)
Expand Down Expand Up @@ -177,4 +176,24 @@ mod tests {
vec![Rule::c, Rule::b, Rule::a]
);
}

#[test]
fn test_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abcNe\nabcde").unwrap().flatten();

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "b");
assert_eq!(pair.line_col(), (1, 2));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (1, 5));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());
}
}
10 changes: 10 additions & 0 deletions pest/src/iterators/pair.rs
Expand Up @@ -43,6 +43,7 @@ pub struct Pair<'i, R> {
input: &'i str,
/// Token index into `queue`.
start: usize,
pub(crate) line_col: Option<(usize, usize)>,
}

/// # Safety
Expand All @@ -57,6 +58,7 @@ pub unsafe fn new<R: RuleType>(
queue,
input,
start,
line_col: None,
}
}

Expand Down Expand Up @@ -241,6 +243,14 @@ impl<'i, R: RuleType> Pair<'i, R> {
::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
}

/// Returns the `line`, `col` of this pair start.
pub fn line_col(&self) -> (usize, usize) {
match &self.line_col {
Some(line_col) => (line_col.0, line_col.1),
None => self.as_span().start_pos().line_col(),
}
}

fn pair(&self) -> usize {
match self.queue[self.start] {
QueueableToken::Start {
Expand Down
103 changes: 101 additions & 2 deletions pest/src/iterators/pairs.rs
Expand Up @@ -23,8 +23,24 @@ use super::flat_pairs::{self, FlatPairs};
use super::pair::{self, Pair};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::RuleType;
use crate::{position, RuleType};

#[derive(Clone)]
pub struct Cursor {
pub line: usize,
pub col: usize,
pub end: usize,
}

impl Default for Cursor {
fn default() -> Cursor {
Cursor {
line: 1,
col: 1,
end: 0,
}
}
}
/// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
///
/// [`Pair`]: struct.Pair.html
Expand All @@ -36,6 +52,7 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
cursor: Cursor,
}

pub fn new<R: RuleType>(
Expand All @@ -49,6 +66,7 @@ pub fn new<R: RuleType>(
input,
start,
end,
cursor: Cursor::default(),
}
}

Expand Down Expand Up @@ -219,13 +237,50 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}
}

/// Move the cursor (line, col) by a part of the input.
fn move_cursor(&mut self, input: &str, start: usize, end: usize) -> (usize, usize) {
// Move cursor for some skiped characters (by skip(n))
let prev_end = self.cursor.end;
if prev_end != start {
self.move_cursor(input, prev_end, start);
}

let (prev_line, prev_col) = (self.cursor.line, self.cursor.col);

let part = &input[self.cursor.end..end];
let (l, c) = position::line_col(part, part.len());

// Because the `original_line_col` returns (line, col) is start from 1
let l = l - 1;
let mut c = c - 1;
if c < 1 {
c = 1
}

self.cursor.line += l;
// Has new line
if l > 0 {
self.cursor.col = c;
} else {
self.cursor.col += c;
}
self.cursor.end = end;

(prev_line, prev_col)
}
}

impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;

fn next(&mut self) -> Option<Self::Item> {
let pair = self.peek()?;
let mut pair = self.peek()?;
let span = pair.as_span();

let (l, c) = self.move_cursor(self.input, span.start(), span.end());
pair.line_col = Some((l, c));

self.start = self.pair() + 1;
Some(pair)
}
Expand Down Expand Up @@ -423,4 +478,48 @@ mod tests {
vec![Rule::c, Rule::a]
);
}

#[test]
fn test_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(1, 4, 3)
);

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (2, 1));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(2, 2, 5)
);

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "fgh");
assert_eq!(pair.line_col(), (2, 2));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(2, 5, 8)
);
}

#[test]
fn test_rev_iter_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "fgh");
assert_eq!(pair.line_col(), (2, 2));

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (2, 1));

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
}
}
1 change: 1 addition & 0 deletions pest/src/lib.rs
Expand Up @@ -360,6 +360,7 @@ pub mod prec_climber;
mod span;
mod stack;
mod token;

#[doc(hidden)]
pub mod unicode;

Expand Down
2 changes: 2 additions & 0 deletions pest/src/macros.rs
Expand Up @@ -329,6 +329,7 @@ pub mod tests {
a,
b,
c,
d,
}

pub struct AbcParser;
Expand All @@ -345,6 +346,7 @@ pub mod tests {
.skip(1)
})
.and_then(|s| s.skip(1).unwrap().rule(Rule::c, |s| s.match_string("e")))
.and_then(|s| s.optional(|s| s.rule(Rule::d, |s| s.match_string("fgh"))))
})
}
}
Expand Down
24 changes: 16 additions & 8 deletions pest/src/position.rs
Expand Up @@ -116,6 +116,9 @@ impl<'i> Position<'i> {

/// Returns the line and column number of this `Position`.
///
/// This is an O(n) operation, where n is the number of chars in the input.
/// You better use [`pair.line_col()`](struct.Pair.html#method.line_col) instead.
///
/// # Examples
///
/// ```
Expand All @@ -135,14 +138,8 @@ impl<'i> Position<'i> {
if self.pos > self.input.len() {
panic!("position out of bounds");
}
#[cfg(feature = "fast-line-col")]
{
fast_line_col(self.input, self.pos)
}
#[cfg(not(feature = "fast-line-col"))]
{
original_line_col(self.input, self.pos)
}

line_col(self.input, self.pos)
}

/// Returns the entire line of the input that contains this `Position`.
Expand Down Expand Up @@ -455,6 +452,17 @@ impl<'i> Hash for Position<'i> {
}
}

pub(crate) fn line_col(input: &str, pos: usize) -> (usize, usize) {
#[cfg(feature = "fast-line-col")]
{
fast_line_col(input, pos)
}
#[cfg(not(feature = "fast-line-col"))]
{
original_line_col(input, pos)
}
}

#[inline]
#[cfg(not(feature = "fast-line-col"))]
fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) {
Expand Down

0 comments on commit 79746cf

Please sign in to comment.