From 1e407663b4aeef32e481643d0c45d834799af404 Mon Sep 17 00:00:00 2001 From: Tomas Tauber <2410580+tomtau@users.noreply.github.com> Date: Fri, 2 Dec 2022 19:59:03 +0800 Subject: [PATCH] perf: add a faster skip_until using SIMD memchr (#745) it was put under the "memchr" feature-flag, as memchr SIMD support is only on x86_64 at the moment (json bench is ~8% faster over baseline there): https://github.com/pest-parser/pest/pull/737#issuecomment-1325784358 once memchr has the Arm SIMD support, this could be perhaps the default implementation. For Aho-Corasick, it'll require more investigation. Co-authored-by: Tomas Tauber --- debugger/Cargo.toml | 8 +++---- derive/Cargo.toml | 6 ++--- generator/Cargo.toml | 6 ++--- grammars/Cargo.toml | 6 ++--- meta/Cargo.toml | 4 ++-- pest/Cargo.toml | 2 +- pest/src/position.rs | 54 ++++++++++++++++++++++++++++++++++++++++++++ vm/Cargo.toml | 6 ++--- 8 files changed, 73 insertions(+), 19 deletions(-) diff --git a/debugger/Cargo.toml b/debugger/Cargo.toml index 4842d540..c2938db5 100644 --- a/debugger/Cargo.toml +++ b/debugger/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_debugger" description = "pest grammar debugger" -version = "2.5.0" +version = "2.5.1" edition = "2021" authors = ["Dragoș Tiselice ", "Tomas Tauber "] homepage = "https://pest.rs/" @@ -14,9 +14,9 @@ readme = "_README.md" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.5.0" } -pest_meta = { path = "../meta", version = "2.5.0" } -pest_vm = { path = "../vm", version = "2.5.0" } +pest = { path = "../pest", version = "2.5.1" } +pest_meta = { path = "../meta", version = "2.5.1" } +pest_vm = { path = "../vm", version = "2.5.1" } rustyline = "10" thiserror = "1" diff --git a/derive/Cargo.toml b/derive/Cargo.toml index addb3c86..2dd43a8b 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_derive" description = "pest's derive macro" -version = "2.5.0" +version = "2.5.1" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -23,5 +23,5 @@ std = ["pest/std", "pest_generator/std"] [dependencies] # for tests, included transitively anyway -pest = { path = "../pest", version = "2.5.0", default-features = false } -pest_generator = { path = "../generator", version = "2.5.0", default-features = false } +pest = { path = "../pest", version = "2.5.1", default-features = false } +pest_generator = { path = "../generator", version = "2.5.1", default-features = false } diff --git a/generator/Cargo.toml b/generator/Cargo.toml index 621fb685..1836fe06 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_generator" description = "pest code generator" -version = "2.5.0" +version = "2.5.1" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -18,8 +18,8 @@ default = ["std"] std = ["pest/std"] [dependencies] -pest = { path = "../pest", version = "2.5.0", default-features = false } -pest_meta = { path = "../meta", version = "2.5.0" } +pest = { path = "../pest", version = "2.5.1", default-features = false } +pest_meta = { path = "../meta", version = "2.5.1" } proc-macro2 = "1.0" quote = "1.0" syn = "1.0" diff --git a/grammars/Cargo.toml b/grammars/Cargo.toml index 807b34bc..1a293e81 100644 --- a/grammars/Cargo.toml +++ b/grammars/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_grammars" description = "pest popular grammar implementations" -version = "2.5.0" +version = "2.5.1" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,8 +14,8 @@ readme = "_README.md" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.5.0" } -pest_derive = { path = "../derive", version = "2.5.0" } +pest = { path = "../pest", version = "2.5.1" } +pest_derive = { path = "../derive", version = "2.5.1" } [dev-dependencies] criterion = "0.3" diff --git a/meta/Cargo.toml b/meta/Cargo.toml index b400f53c..c1f004de 100644 --- a/meta/Cargo.toml +++ b/meta/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_meta" description = "pest meta language parser and validator" -version = "2.5.0" +version = "2.5.1" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -16,7 +16,7 @@ include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.5.0" } +pest = { path = "../pest", version = "2.5.1" } once_cell = "1.8.0" [build-dependencies] diff --git a/pest/Cargo.toml b/pest/Cargo.toml index 957db39f..e6d18b06 100644 --- a/pest/Cargo.toml +++ b/pest/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest" description = "The Elegant Parser" -version = "2.5.0" +version = "2.5.1" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" diff --git a/pest/src/position.rs b/pest/src/position.rs index 6de1d76f..f91f8291 100644 --- a/pest/src/position.rs +++ b/pest/src/position.rs @@ -266,6 +266,60 @@ impl<'i> Position<'i> { /// this function will return `false` but its `pos` will *still* be updated. #[inline] pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool { + #[cfg(not(feature = "memchr"))] + { + self.skip_until_basic(strings) + } + #[cfg(feature = "memchr")] + { + match strings { + [] => (), + [s1] => { + if let Some(from) = + memchr::memmem::find(&self.input.as_bytes()[self.pos..], s1.as_bytes()) + { + self.pos += from; + return true; + } + } + [s1, s2] if !s1.is_empty() && !s2.is_empty() => { + let b1 = s1.as_bytes()[0]; + let b2 = s2.as_bytes()[0]; + let miter = memchr::memchr2_iter(b1, b2, &self.input.as_bytes()[self.pos..]); + for from in miter { + let start = &self.input[self.pos + from..]; + if start.starts_with(s1) || start.starts_with(s2) { + self.pos += from; + return true; + } + } + } + [s1, s2, s3] if !s1.is_empty() && !s2.is_empty() && s3.is_empty() => { + let b1 = s1.as_bytes()[0]; + let b2 = s2.as_bytes()[0]; + let b3 = s2.as_bytes()[0]; + let miter = + memchr::memchr3_iter(b1, b2, b3, &self.input.as_bytes()[self.pos..]); + for from in miter { + let start = &self.input[self.pos + from..]; + if start.starts_with(s1) || start.starts_with(s2) || start.starts_with(s3) { + self.pos += from; + return true; + } + } + } + _ => { + return self.skip_until_basic(strings); + } + } + self.pos = self.input.len(); + false + } + } + + #[inline] + fn skip_until_basic(&mut self, strings: &[&str]) -> bool { + // TODO: optimize with Aho-Corasick, e.g. https://crates.io/crates/daachorse? for from in self.pos..self.input.len() { let bytes = if let Some(string) = self.input.get(from..) { string.as_bytes() diff --git a/vm/Cargo.toml b/vm/Cargo.toml index da2b9d26..921e5c6e 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_vm" description = "pest grammar virtual machine" -version = "2.5.0" +version = "2.5.1" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,5 +14,5 @@ readme = "_README.md" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.5.0" } -pest_meta = { path = "../meta", version = "2.5.0" } +pest = { path = "../pest", version = "2.5.1" } +pest_meta = { path = "../meta", version = "2.5.1" }