Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: speed up Position::line_col for large inputs using SIMD #707

Merged
merged 1 commit into from Sep 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions derive/Cargo.toml
@@ -1,7 +1,7 @@
[package]
name = "pest_derive"
description = "pest's derive macro"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -23,5 +23,5 @@ std = ["pest/std", "pest_generator/std"]

[dependencies]
# for tests, included transitively anyway
pest = { path = "../pest", version = "2.3.0", default-features = false }
pest_generator = { path = "../generator", version = "2.3.0", default-features = false }
pest = { path = "../pest", version = "2.3.1", default-features = false }
pest_generator = { path = "../generator", version = "2.3.1", default-features = false }
6 changes: 3 additions & 3 deletions generator/Cargo.toml
@@ -1,7 +1,7 @@
[package]
name = "pest_generator"
description = "pest code generator"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -18,8 +18,8 @@ default = ["std"]
std = ["pest/std"]

[dependencies]
pest = { path = "../pest", version = "2.3.0", default-features = false }
pest_meta = { path = "../meta", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1", default-features = false }
pest_meta = { path = "../meta", version = "2.3.1" }
proc-macro2 = "1.0"
quote = "1.0"
syn = "1.0"
6 changes: 3 additions & 3 deletions grammars/Cargo.toml
@@ -1,7 +1,7 @@
[package]
name = "pest_grammars"
description = "pest popular grammar implementations"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -14,8 +14,8 @@ readme = "_README.md"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest_derive = { path = "../derive", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
pest_derive = { path = "../derive", version = "2.3.1" }

[dev-dependencies]
criterion = "0.3"
Expand Down
42 changes: 41 additions & 1 deletion grammars/benches/json.rs
Expand Up @@ -30,5 +30,45 @@ fn criterion_benchmark(c: &mut Criterion) {
});
}

criterion_group!(benches, criterion_benchmark);
mod autocorrect {
use pest_derive::Parser;

#[derive(Parser)]
#[grammar_inline = r#"
newline = ${ "\n" | "\r" }
space = ${ " "+ }

other = ${ !(pair) ~ ANY }
comment = ${ single_line_comment | multiline_comment }
single_line_comment = _{ "//" ~ (!(newline) ~ ANY)* }
multiline_comment = _{ "/*" ~ (!("*/") ~ ANY)* ~ "*/"}

string_type = _{
("\"" ~ (!(newline | "\"") ~ ANY)* ~ "\"")
}
key = ${ string_type ~ (" ")* ~ ":" ~ (" ")* }
string = ${ string_type }
pair = _{ key ~ string }

line = _{ pair | comment | space | other | newline }
item = _{ SOI ~ line* ~ EOI }
"#]
pub struct JsonParser;
}

fn line_col_benchmark(c: &mut Criterion) {
let mut file = File::open("benches/main.i18n.json").unwrap();
let mut data = String::new();

file.read_to_string(&mut data).unwrap();
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();
let last_pair = pairs.last().unwrap();
c.bench_function("line col", |b| {
b.iter(|| {
let _ = last_pair.as_span().start_pos().line_col();
});
});
}

criterion_group!(benches, criterion_benchmark, line_col_benchmark,);
criterion_main!(benches);
10,128 changes: 10,128 additions & 0 deletions grammars/benches/main.i18n.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions meta/Cargo.toml
@@ -1,7 +1,7 @@
[package]
name = "pest_meta"
description = "pest meta language parser and validator"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -16,7 +16,7 @@ include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
once_cell = "1.8.0"

[build-dependencies]
Expand Down
7 changes: 6 additions & 1 deletion pest/Cargo.toml
@@ -1,7 +1,7 @@
[package]
name = "pest"
description = "The Elegant Parser"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -21,9 +21,14 @@ std = ["ucd-trie/std", "thiserror"]
pretty-print = ["serde", "serde_json"]
# Enable const fn constructor for `PrecClimber`
const_prec_climber = []
# Enable faster `Position::line_col` calculation using SIMD
# (note that this may have extra overhead for small inputs)
fast-line-col = ["memchr", "bytecount"]

[dependencies]
ucd-trie = { version = "0.1.1", default-features = false }
serde = { version = "1.0.89", optional = true }
serde_json = { version = "1.0.39", optional = true}
thiserror = { version = "1.0.31", optional = true }
memchr = { version = "2", optional = true }
bytecount = { version = "0.6", optional = true }
104 changes: 66 additions & 38 deletions pest/src/position.rs
Expand Up @@ -135,45 +135,14 @@ impl<'i> Position<'i> {
if self.pos > self.input.len() {
panic!("position out of bounds");
}

let mut pos = self.pos;
// Position's pos is always a UTF-8 border.
let slice = &self.input[..pos];
let mut chars = slice.chars().peekable();

let mut line_col = (1, 1);

while pos != 0 {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

if pos == 1 {
pos -= 1;
} else {
pos -= 2;
}

line_col = (line_col.0 + 1, 1);
} else {
pos -= 1;
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
pos -= 1;
line_col = (line_col.0 + 1, 1);
}
Some(c) => {
pos -= c.len_utf8();
line_col = (line_col.0, line_col.1 + 1);
}
None => unreachable!(),
}
#[cfg(feature = "fast-line-col")]
{
fast_line_col(self.input, self.pos)
}
#[cfg(not(feature = "fast-line-col"))]
{
original_line_col(self.input, self.pos)
}

line_col
}

/// Returns the entire line of the input that contains this `Position`.
Expand Down Expand Up @@ -432,6 +401,63 @@ impl<'i> Hash for Position<'i> {
}
}

#[inline]
#[cfg(not(feature = "fast-line-col"))]
fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) {
// Position's pos is always a UTF-8 border.
let slice = &input[..pos];
let mut chars = slice.chars().peekable();

let mut line_col = (1, 1);

while pos != 0 {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

if pos == 1 {
pos -= 1;
} else {
pos -= 2;
}

line_col = (line_col.0 + 1, 1);
} else {
pos -= 1;
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
pos -= 1;
line_col = (line_col.0 + 1, 1);
}
Some(c) => {
pos -= c.len_utf8();
line_col = (line_col.0, line_col.1 + 1);
}
None => unreachable!(),
}
}

line_col
}

#[inline]
#[cfg(feature = "fast-line-col")]
fn fast_line_col(input: &str, pos: usize) -> (usize, usize) {
// Position's pos is always a UTF-8 border.
let slice = &input[..pos];

let prec_ln = memchr::memrchr(b'\n', slice.as_bytes());
if let Some(prec_nl_pos) = prec_ln {
let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + 1;
(lines, slice[prec_nl_pos..].chars().count())
} else {
(1, slice.chars().count() + 1)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -465,6 +491,8 @@ mod tests {
assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1));
assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2));
assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3));
let input = "abcd嗨";
assert_eq!(Position::new(input, 7).unwrap().line_col(), (1, 6));
}

#[test]
Expand Down
6 changes: 3 additions & 3 deletions vm/Cargo.toml
@@ -1,7 +1,7 @@
[package]
name = "pest_vm"
description = "pest grammar virtual machine"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -14,5 +14,5 @@ readme = "_README.md"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest_meta = { path = "../meta", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
pest_meta = { path = "../meta", version = "2.3.1" }