Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve line, col calculate performance by use move cursor on Pairs Iterator. #754

Merged
merged 5 commits into from Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 24 additions & 3 deletions grammars/benches/json.rs
Expand Up @@ -56,16 +56,37 @@ item = _{ SOI ~ line* ~ EOI }
pub struct JsonParser;
}

// With 500 times iter
// pair.line_col time: [2.9937 µs 2.9975 µs 3.0018 µs]
// position.line_col time: [212.59 µs 213.38 µs 214.29 µs]
// position.line_col (with fast-line-col) time: [18.241 µs 18.382 µs 18.655 µs]
//
// With 1000 times iter
// pair.line_col time: [10.814 µs 10.846 µs 10.893 µs]
// position.line_col time: [90.135 µs 93.901 µs 98.655 µs]
// position.line_col (with fast-line-col) time: [1.7199 ms 1.7246 ms 1.7315 ms]
fn line_col_benchmark(c: &mut Criterion) {
let mut file = File::open("benches/main.i18n.json").unwrap();
let mut data = String::new();

file.read_to_string(&mut data).unwrap();
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();
let last_pair = pairs.last().unwrap();
c.bench_function("line col", |b| {

c.bench_function("pair.line_col", |b| {
b.iter(|| {
let mut pairs = pairs.clone();
for _ in 0..500 {
pairs.next().unwrap().line_col();
}
})
});

c.bench_function("position.line_col", |b| {
b.iter(|| {
let _ = last_pair.as_span().start_pos().line_col();
let mut pairs = pairs.clone();
for _ in 0..500 {
pairs.next().unwrap().as_span().start_pos().line_col();
}
});
});
}
Expand Down
25 changes: 22 additions & 3 deletions pest/src/iterators/flat_pairs.rs
Expand Up @@ -107,8 +107,7 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
return None;
}

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) };

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start, None) };
self.next_start();

Some(pair)
Expand All @@ -123,7 +122,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {

self.next_start_from_end();

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end, None) };

Some(pair)
}
Expand Down Expand Up @@ -177,4 +176,24 @@ mod tests {
vec![Rule::c, Rule::b, Rule::a]
);
}

#[test]
fn test_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abcNe\nabcde").unwrap().flatten();

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "b");
assert_eq!(pair.line_col(), (1, 2));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (1, 5));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());
}
}
13 changes: 12 additions & 1 deletion pest/src/iterators/pair.rs
Expand Up @@ -20,7 +20,7 @@ use core::str;
#[cfg(feature = "pretty-print")]
use serde::ser::SerializeStruct;

use super::pairs::{self, Pairs};
use super::pairs::{self, Cursor, Pairs};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::span::{self, Span};
Expand All @@ -43,6 +43,7 @@ pub struct Pair<'i, R> {
input: &'i str,
/// Token index into `queue`.
start: usize,
cursor: Option<Cursor>,
}

/// # Safety
Expand All @@ -52,11 +53,13 @@ pub unsafe fn new<R: RuleType>(
queue: Rc<Vec<QueueableToken<R>>>,
input: &str,
start: usize,
cursor: Option<Cursor>,
) -> Pair<'_, R> {
Pair {
queue,
input,
start,
cursor,
}
}

Expand Down Expand Up @@ -241,6 +244,14 @@ impl<'i, R: RuleType> Pair<'i, R> {
::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
}

/// Returns the `line`, `col` of this pair start.
pub fn line_col(&self) -> (usize, usize) {
match &self.cursor {
Some(cursor) => (cursor.line, cursor.col),
None => self.as_span().start_pos().line_col(),
}
}

fn pair(&self) -> usize {
match self.queue[self.start] {
QueueableToken::Start {
Expand Down
114 changes: 112 additions & 2 deletions pest/src/iterators/pairs.rs
Expand Up @@ -25,6 +25,82 @@ use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::RuleType;

#[derive(Clone)]
pub struct Cursor {
pub line: usize,
pub col: usize,
}

impl Default for Cursor {
fn default() -> Cursor {
Cursor { line: 1, col: 1 }
}
}

impl Cursor {
pub(crate) fn get(&self) -> (usize, usize) {
(self.line, self.col)
}
}

pub trait CursorPairs {
fn cursor(&self) -> Cursor;
fn cursor_mut(&mut self) -> &mut Cursor;

/// Move the (line, col) with string part
fn move_cursor(&mut self, part: &str) -> (usize, usize) {
let (l, c, has_new_line) = self.line_col(part);

let (prev_line, prev_col) = self.cursor().get();

self.cursor_mut().line += l;
if has_new_line {
self.cursor_mut().col = c;
} else {
self.cursor_mut().col += c;
}
(prev_line, prev_col)
}

/// Calculate line and col number of a string part
/// Fork from Pest for just count the part.
///
/// https://github.com/pest-parser/pest/blob/85b18aae23cc7b266c0b5252f9f74b7ab0000795/pest/src/position.rs#L135
fn line_col(&self, part: &str) -> (usize, usize, bool) {
huacnlee marked this conversation as resolved.
Show resolved Hide resolved
let mut chars = part.chars().peekable();

let mut line_col = (0, 0);
let mut has_new_line = false;

loop {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

line_col = (line_col.0 + 1, 1);
NoahTheDuke marked this conversation as resolved.
Show resolved Hide resolved
has_new_line = true;
} else {
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
line_col = (line_col.0 + 1, 1);
has_new_line = true;
}
Some(_c) => {
line_col = (line_col.0, line_col.1 + 1);
}
None => {
break;
}
}
}

(line_col.0, line_col.1, has_new_line)
}
}

/// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
///
/// [`Pair`]: struct.Pair.html
Expand All @@ -36,6 +112,7 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
cursor: Cursor,
}

pub fn new<R: RuleType>(
Expand All @@ -49,6 +126,7 @@ pub fn new<R: RuleType>(
input,
start,
end,
cursor: Cursor::default(),
}
}

Expand Down Expand Up @@ -181,7 +259,14 @@ impl<'i, R: RuleType> Pairs<'i, R> {
#[inline]
pub fn peek(&self) -> Option<Pair<'i, R>> {
if self.start < self.end {
Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) })
Some(unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
self.start,
Some(self.cursor.clone()),
)
})
} else {
None
}
Expand Down Expand Up @@ -221,11 +306,22 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}

impl<'i, R: RuleType> CursorPairs for Pairs<'i, R> {
fn cursor(&self) -> Cursor {
self.cursor.clone()
}

fn cursor_mut(&mut self) -> &mut Cursor {
&mut self.cursor
}
}

impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;

fn next(&mut self) -> Option<Self::Item> {
let pair = self.peek()?;
self.move_cursor(pair.as_str());
self.start = self.pair() + 1;
Some(pair)
}
Expand All @@ -239,7 +335,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {

self.end = self.pair_from_end();

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end, None) };

Some(pair)
}
Expand Down Expand Up @@ -423,4 +519,18 @@ mod tests {
vec![Rule::c, Rule::a]
);
}

#[test]
fn test_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abcde\nabcde").unwrap();
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(pairs.cursor.get(), (1, 4));

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (1, 4));
assert_eq!(pairs.cursor.get(), (1, 5));
}
}
1 change: 1 addition & 0 deletions pest/src/lib.rs
Expand Up @@ -360,6 +360,7 @@ pub mod prec_climber;
mod span;
mod stack;
mod token;

#[doc(hidden)]
pub mod unicode;

Expand Down
4 changes: 4 additions & 0 deletions pest/src/position.rs
Expand Up @@ -116,6 +116,10 @@ impl<'i> Position<'i> {

/// Returns the line and column number of this `Position`.
///
/// This is an O(n) operation, where n is the number of lines in the input.
/// You better use `pair.line_col()` instead.
huacnlee marked this conversation as resolved.
Show resolved Hide resolved
///
///
/// # Examples
///
/// ```
Expand Down