Skip to content

Commit

Permalink
Improve line, col calculate performance by use move cursor on Pairs I…
Browse files Browse the repository at this point in the history
  • Loading branch information
huacnlee committed Dec 27, 2022
1 parent 024b857 commit 6434877
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 6 deletions.
2 changes: 1 addition & 1 deletion grammars/benches/json.rs
Expand Up @@ -65,7 +65,7 @@ fn line_col_benchmark(c: &mut Criterion) {
let last_pair = pairs.last().unwrap();
c.bench_function("line col", |b| {
b.iter(|| {
let _ = last_pair.as_span().start_pos().line_col();
last_pair.line_col();
});
});
}
Expand Down
33 changes: 31 additions & 2 deletions pest/src/iterators/flat_pairs.rs
Expand Up @@ -12,6 +12,7 @@ use alloc::vec::Vec;
use core::fmt;

use super::pair::{self, Pair};
use super::pairs::{Cursor, CursorPairs};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::RuleType;
Expand All @@ -28,6 +29,7 @@ pub struct FlatPairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
cursor: Cursor,
}

/// # Safety
Expand All @@ -44,6 +46,7 @@ pub unsafe fn new<R: RuleType>(
input,
start,
end,
cursor: Cursor::default(),
}
}

Expand Down Expand Up @@ -107,7 +110,15 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
return None;
}

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) };
let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
self.start,
self.cursor.clone(),
)
};
self.move_cursor(pair.as_str());

self.next_start();

Expand All @@ -123,7 +134,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {

self.next_start_from_end();

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
self.end,
self.cursor.clone(),
)
};

Some(pair)
}
Expand All @@ -144,10 +162,21 @@ impl<'i, R: Clone> Clone for FlatPairs<'i, R> {
input: self.input,
start: self.start,
end: self.end,
cursor: self.cursor.clone(),
}
}
}

impl<'i, R: RuleType> CursorPairs for FlatPairs<'i, R> {
fn cursor(&self) -> Cursor {
self.cursor.clone()
}

fn cursor_mut(&mut self) -> &mut Cursor {
&mut self.cursor
}
}

#[cfg(test)]
mod tests {
use super::super::super::macros::tests::*;
Expand Down
13 changes: 12 additions & 1 deletion pest/src/iterators/pair.rs
Expand Up @@ -20,7 +20,7 @@ use core::str;
#[cfg(feature = "pretty-print")]
use serde::ser::SerializeStruct;

use super::pairs::{self, Pairs};
use super::pairs::{self, Cursor, Pairs};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::span::{self, Span};
Expand All @@ -43,6 +43,9 @@ pub struct Pair<'i, R> {
input: &'i str,
/// Token index into `queue`.
start: usize,

line: usize,
col: usize,
}

/// # Safety
Expand All @@ -52,11 +55,14 @@ pub unsafe fn new<R: RuleType>(
queue: Rc<Vec<QueueableToken<R>>>,
input: &str,
start: usize,
cursor: Cursor,
) -> Pair<'_, R> {
Pair {
queue,
input,
start,
line: cursor.line,
col: cursor.col,
}
}

Expand Down Expand Up @@ -241,6 +247,11 @@ impl<'i, R: RuleType> Pair<'i, R> {
::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
}

/// Returns the `line`, `col` of this pair start.
pub fn line_col(&self) -> (usize, usize) {
(self.line, self.col)
}

fn pair(&self) -> usize {
match self.queue[self.start] {
QueueableToken::Start {
Expand Down
121 changes: 119 additions & 2 deletions pest/src/iterators/pairs.rs
Expand Up @@ -25,6 +25,82 @@ use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::RuleType;

#[derive(Clone)]
pub struct Cursor {
pub line: usize,
pub col: usize,
}

impl Default for Cursor {
fn default() -> Cursor {
Cursor { line: 1, col: 1 }
}
}

impl Cursor {
fn get(&self) -> (usize, usize) {
(self.line, self.col)
}
}

pub trait CursorPairs {
fn cursor(&self) -> Cursor;
fn cursor_mut(&mut self) -> &mut Cursor;

/// Move the (line, col) with string part
fn move_cursor(&mut self, part: &str) -> (usize, usize) {
let (l, c, has_new_line) = self.line_col(part);

let (prev_line, prev_col) = self.cursor().get();

self.cursor_mut().line += l;
if has_new_line {
self.cursor_mut().col = c;
} else {
self.cursor_mut().col += c;
}
(prev_line, prev_col)
}

/// Calculate line and col number of a string part
/// Fork from Pest for just count the part.
///
/// https://github.com/pest-parser/pest/blob/85b18aae23cc7b266c0b5252f9f74b7ab0000795/pest/src/position.rs#L135
fn line_col(&self, part: &str) -> (usize, usize, bool) {
let mut chars = part.chars().peekable();

let mut line_col = (0, 0);
let mut has_new_line = false;

loop {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

line_col = (line_col.0 + 1, 1);
has_new_line = true;
} else {
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
line_col = (line_col.0 + 1, 1);
has_new_line = true;
}
Some(_c) => {
line_col = (line_col.0, line_col.1 + 1);
}
None => {
break;
}
}
}

(line_col.0, line_col.1, has_new_line)
}
}

/// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
///
/// [`Pair`]: struct.Pair.html
Expand All @@ -36,6 +112,7 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
cursor: Cursor,
}

pub fn new<R: RuleType>(
Expand All @@ -49,6 +126,7 @@ pub fn new<R: RuleType>(
input,
start,
end,
cursor: Cursor::default(),
}
}

Expand Down Expand Up @@ -181,7 +259,14 @@ impl<'i, R: RuleType> Pairs<'i, R> {
#[inline]
pub fn peek(&self) -> Option<Pair<'i, R>> {
if self.start < self.end {
Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) })
Some(unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
self.start,
self.cursor.clone(),
)
})
} else {
None
}
Expand Down Expand Up @@ -221,11 +306,22 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}

impl<'i, R: RuleType> CursorPairs for Pairs<'i, R> {
fn cursor(&self) -> Cursor {
self.cursor.clone()
}

fn cursor_mut(&mut self) -> &mut Cursor {
&mut self.cursor
}
}

impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;

fn next(&mut self) -> Option<Self::Item> {
let pair = self.peek()?;
self.move_cursor(pair.as_str());
self.start = self.pair() + 1;
Some(pair)
}
Expand All @@ -239,7 +335,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {

self.end = self.pair_from_end();

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
self.end,
self.cursor.clone(),
)
};

Some(pair)
}
Expand Down Expand Up @@ -423,4 +526,18 @@ mod tests {
vec![Rule::c, Rule::a]
);
}

#[test]
fn test_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abcde\nabcde").unwrap();
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(pairs.cursor.get(), (1, 4));

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (1, 4));
assert_eq!(pairs.cursor.get(), (1, 5));
}
}
1 change: 1 addition & 0 deletions pest/src/lib.rs
Expand Up @@ -360,6 +360,7 @@ pub mod prec_climber;
mod span;
mod stack;
mod token;

#[doc(hidden)]
pub mod unicode;

Expand Down

0 comments on commit 6434877

Please sign in to comment.