Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve line, col calculate performance by use move cursor on Pairs Iterator. #754

Merged
merged 5 commits into from Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 24 additions & 3 deletions grammars/benches/json.rs
Expand Up @@ -56,16 +56,37 @@ item = _{ SOI ~ line* ~ EOI }
pub struct JsonParser;
}

// With 500 times iter
// pair.line_col time: [2.9937 µs 2.9975 µs 3.0018 µs]
// position.line_col time: [212.59 µs 213.38 µs 214.29 µs]
// position.line_col (with fast-line-col) time: [18.241 µs 18.382 µs 18.655 µs]
//
// With 1000 times iter
// pair.line_col time: [10.814 µs 10.846 µs 10.893 µs]
// position.line_col time: [90.135 µs 93.901 µs 98.655 µs]
// position.line_col (with fast-line-col) time: [1.7199 ms 1.7246 ms 1.7315 ms]
fn line_col_benchmark(c: &mut Criterion) {
let mut file = File::open("benches/main.i18n.json").unwrap();
let mut data = String::new();

file.read_to_string(&mut data).unwrap();
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();
let last_pair = pairs.last().unwrap();
c.bench_function("line col", |b| {

c.bench_function("pair.line_col", |b| {
b.iter(|| {
let mut pairs = pairs.clone();
for _ in 0..500 {
pairs.next().unwrap().line_col();
}
})
});

c.bench_function("position.line_col", |b| {
b.iter(|| {
let _ = last_pair.as_span().start_pos().line_col();
let mut pairs = pairs.clone();
for _ in 0..500 {
pairs.next().unwrap().as_span().start_pos().line_col();
}
});
});
}
Expand Down
21 changes: 20 additions & 1 deletion pest/src/iterators/flat_pairs.rs
Expand Up @@ -108,7 +108,6 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
}

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) };

self.next_start();

Some(pair)
Expand Down Expand Up @@ -177,4 +176,24 @@ mod tests {
vec![Rule::c, Rule::b, Rule::a]
);
}

#[test]
fn test_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abcNe\nabcde").unwrap().flatten();

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "b");
assert_eq!(pair.line_col(), (1, 2));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (1, 5));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());
}
}
10 changes: 10 additions & 0 deletions pest/src/iterators/pair.rs
Expand Up @@ -43,6 +43,7 @@ pub struct Pair<'i, R> {
input: &'i str,
/// Token index into `queue`.
start: usize,
pub(crate) line_col: Option<(usize, usize)>,
}

/// # Safety
Expand All @@ -57,6 +58,7 @@ pub unsafe fn new<R: RuleType>(
queue,
input,
start,
line_col: None,
}
}

Expand Down Expand Up @@ -241,6 +243,14 @@ impl<'i, R: RuleType> Pair<'i, R> {
::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
}

/// Returns the `line`, `col` of this pair start.
pub fn line_col(&self) -> (usize, usize) {
match &self.line_col {
Some(line_col) => (line_col.0, line_col.1),
None => self.as_span().start_pos().line_col(),
}
}

fn pair(&self) -> usize {
match self.queue[self.start] {
QueueableToken::Start {
Expand Down
103 changes: 101 additions & 2 deletions pest/src/iterators/pairs.rs
Expand Up @@ -23,8 +23,24 @@ use super::flat_pairs::{self, FlatPairs};
use super::pair::{self, Pair};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::RuleType;
use crate::{position, RuleType};

#[derive(Clone)]
pub struct Cursor {
pub line: usize,
pub col: usize,
pub end: usize,
}

impl Default for Cursor {
fn default() -> Cursor {
Cursor {
line: 1,
col: 1,
end: 0,
}
}
}
/// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
///
/// [`Pair`]: struct.Pair.html
Expand All @@ -36,6 +52,7 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
cursor: Cursor,
}

pub fn new<R: RuleType>(
Expand All @@ -49,6 +66,7 @@ pub fn new<R: RuleType>(
input,
start,
end,
cursor: Cursor::default(),
}
}

Expand Down Expand Up @@ -219,13 +237,50 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}
}

/// Move the cursor (line, col) by a part of the input.
fn move_cursor(&mut self, input: &str, start: usize, end: usize) -> (usize, usize) {
// Move cursor for some skiped characters (by skip(n))
let prev_end = self.cursor.end;
if prev_end != start {
self.move_cursor(input, prev_end, start);
}

let (prev_line, prev_col) = (self.cursor.line, self.cursor.col);

let part = &input[self.cursor.end..end];
let (l, c) = position::line_col(part, part.len());

// Because the `original_line_col` returns (line, col) is start from 1
let l = l - 1;
let mut c = c - 1;
if c < 1 {
c = 1
}

self.cursor.line += l;
// Has new line
if l > 0 {
self.cursor.col = c;
} else {
self.cursor.col += c;
}
self.cursor.end = end;

(prev_line, prev_col)
}
}

impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;

fn next(&mut self) -> Option<Self::Item> {
let pair = self.peek()?;
let mut pair = self.peek()?;
let span = pair.as_span();

let (l, c) = self.move_cursor(self.input, span.start(), span.end());
pair.line_col = Some((l, c));

self.start = self.pair() + 1;
Some(pair)
}
Expand Down Expand Up @@ -423,4 +478,48 @@ mod tests {
vec![Rule::c, Rule::a]
);
}

#[test]
fn test_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(1, 4, 3)
);

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (2, 1));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(2, 2, 5)
);

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "fgh");
assert_eq!(pair.line_col(), (2, 2));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(2, 5, 8)
);
}

#[test]
fn test_rev_iter_line_col() {
let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "fgh");
assert_eq!(pair.line_col(), (2, 2));

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (2, 1));

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
}
}
1 change: 1 addition & 0 deletions pest/src/lib.rs
Expand Up @@ -360,6 +360,7 @@ pub mod prec_climber;
mod span;
mod stack;
mod token;

#[doc(hidden)]
pub mod unicode;

Expand Down
2 changes: 2 additions & 0 deletions pest/src/macros.rs
Expand Up @@ -329,6 +329,7 @@ pub mod tests {
a,
b,
c,
d,
}

pub struct AbcParser;
Expand All @@ -345,6 +346,7 @@ pub mod tests {
.skip(1)
})
.and_then(|s| s.skip(1).unwrap().rule(Rule::c, |s| s.match_string("e")))
.and_then(|s| s.optional(|s| s.rule(Rule::d, |s| s.match_string("fgh"))))
})
}
}
Expand Down
24 changes: 16 additions & 8 deletions pest/src/position.rs
Expand Up @@ -116,6 +116,9 @@ impl<'i> Position<'i> {

/// Returns the line and column number of this `Position`.
///
/// This is an O(n) operation, where n is the number of chars in the input.
/// You better use [`pair.line_col()`](struct.Pair.html#method.line_col) instead.
///
/// # Examples
///
/// ```
Expand All @@ -135,14 +138,8 @@ impl<'i> Position<'i> {
if self.pos > self.input.len() {
panic!("position out of bounds");
}
#[cfg(feature = "fast-line-col")]
{
fast_line_col(self.input, self.pos)
}
#[cfg(not(feature = "fast-line-col"))]
{
original_line_col(self.input, self.pos)
}

line_col(self.input, self.pos)
}

/// Returns the entire line of the input that contains this `Position`.
Expand Down Expand Up @@ -455,6 +452,17 @@ impl<'i> Hash for Position<'i> {
}
}

pub(crate) fn line_col(input: &str, pos: usize) -> (usize, usize) {
#[cfg(feature = "fast-line-col")]
{
fast_line_col(input, pos)
}
#[cfg(not(feature = "fast-line-col"))]
{
original_line_col(input, pos)
}
}

#[inline]
#[cfg(not(feature = "fast-line-col"))]
fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) {
Expand Down