From 2b65380c563312389644b36c8db3b930ce466e6a Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sat, 14 Oct 2023 15:47:07 -0400 Subject: [PATCH] Add the "teddy" algorithm from aho-corasick Per suggestion from @BurntSushi [here](https://github.com/tafia/quick-xml/pull/664#issuecomment-1763371610) On my M1, tt appears to be slower but competitive with memchr up to memchr3, then start being the from 5-16 --- Cargo.toml | 1 + benches/benchmarks.rs | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 064f8c6..1bccce0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ benchmarks = [] pattern = [] [dev-dependencies] +aho-corasick = "1.1.0" proptest = "1.0.0" lazy_static = "1.0.0" region = "3.0.0" diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 1f2124d..4e9a3ef 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -40,6 +40,10 @@ fn spaces(c: &mut Criterion) { group.bench_function("stdlib_iter_position", |b| { b.iter(|| haystack.bytes().position(|c| c == b' ')); }); + group.bench_function("teddy", |b| { + let searcher = aho_corasick::packed::Searcher::new([" "]).unwrap(); + b.iter(|| searcher.find(&haystack).map(|m| m.start())); + }); group.bench_function("memchr", |b| { b.iter(|| memchr::memchr(b' ', haystack.as_bytes())); }); @@ -69,6 +73,10 @@ fn xml3(c: &mut Criterion) { .position(|c| c == b'<' || c == b'>' || c == b'&') }); }); + group.bench_function("teddy", |b| { + let searcher = aho_corasick::packed::Searcher::new(["<", ">", "&"]).unwrap(); + b.iter(|| searcher.find(&haystack).map(|m| m.start())); + }); group.bench_function("memchr", |b| { b.iter(|| memchr::memchr3(b'<', b'>', b'&', haystack.as_bytes())); }); @@ -98,6 +106,10 @@ fn xml5(c: &mut Criterion) { .position(|c| c == b'<' || c == b'>' || c == b'&' || c == b'\'' || c == b'"') }); }); + group.bench_function("teddy", |b| { + let searcher = aho_corasick::packed::Searcher::new(["<", ">", "&", "'", "\""]).unwrap(); + b.iter(|| searcher.find(&haystack).map(|m| m.start())); + }); group.bench_function("memchr", |b| { b.iter(|| { let bytes = haystack.as_bytes(); @@ -174,6 +186,10 @@ fn big_16(c: &mut Criterion) { }) }); }); + group.bench_function("teddy", |b| { + let searcher = aho_corasick::packed::Searcher::new(b"ABCDEFGHIJKLMNOP".iter().map(|b| std::array::from_ref(b))).unwrap(); + b.iter(|| searcher.find(&haystack).map(|m| m.start())); + }); group.bench_function("memchr", |b| { b.iter(|| { let bytes = haystack.as_bytes(); @@ -253,6 +269,10 @@ fn big_16(c: &mut Criterion) { }) }); }); + group.bench_function("teddy", |b| { + let searcher = aho_corasick::packed::Searcher::new(b"ABCDEFGHIJKLMNOP".iter().map(|b| std::array::from_ref(b))).unwrap(); + b.iter(|| searcher.find(&haystack).map(|m| m.start())); + }); group.bench_function("memchr", |b| { b.iter(|| { let bytes = haystack.as_bytes();