Skip to content

Commit

Permalink
Merge pull request #113 from indutny/fix/benchmarks
Browse files Browse the repository at this point in the history
use criterion.rs for word benchmarks
  • Loading branch information
Manishearth committed Jan 30, 2023
2 parents 07e6155 + b59b1ce commit 243af2c
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 58 deletions.
55 changes: 26 additions & 29 deletions benches/unicode_words.rs
@@ -1,55 +1,52 @@
#[macro_use]
extern crate bencher;
extern crate unicode_segmentation;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

use bencher::Bencher;
use std::fs;
use unicode_segmentation::UnicodeSegmentation;

fn unicode_words(bench: &mut Bencher, path: &str) {
fn unicode_words(c: &mut Criterion, lang: &str, path: &str) {
let text = fs::read_to_string(path).unwrap();
bench.iter(|| {
for w in text.unicode_words() {
bencher::black_box(w);
}
c.bench_function(&format!("unicode_words_{}", lang), |bench| {
bench.iter(|| {
for w in text.unicode_words() {
black_box(w);
}
})
});

bench.bytes = text.len() as u64;
}

fn unicode_words_arabic(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/arabic.txt");
fn unicode_words_arabic(c: &mut Criterion) {
unicode_words(c, "arabic", "benches/texts/arabic.txt");
}

fn unicode_words_english(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/english.txt");
fn unicode_words_english(c: &mut Criterion) {
unicode_words(c, "english", "benches/texts/english.txt");
}

fn unicode_words_hindi(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/hindi.txt");
fn unicode_words_hindi(c: &mut Criterion) {
unicode_words(c, "hindi", "benches/texts/hindi.txt");
}

fn unicode_words_japanese(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/japanese.txt");
fn unicode_words_japanese(c: &mut Criterion) {
unicode_words(c, "japanese", "benches/texts/japanese.txt");
}

fn unicode_words_korean(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/korean.txt");
fn unicode_words_korean(c: &mut Criterion) {
unicode_words(c, "korean", "benches/texts/korean.txt");
}

fn unicode_words_mandarin(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/mandarin.txt");
fn unicode_words_mandarin(c: &mut Criterion) {
unicode_words(c, "mandarin", "benches/texts/mandarin.txt");
}

fn unicode_words_russian(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/russian.txt");
fn unicode_words_russian(c: &mut Criterion) {
unicode_words(c, "russian", "benches/texts/russian.txt");
}

fn unicode_words_source_code(bench: &mut Bencher) {
unicode_words(bench, "benches/texts/source_code.txt");
fn unicode_words_source_code(c: &mut Criterion) {
unicode_words(c, "source_code", "benches/texts/source_code.txt");
}

benchmark_group!(
criterion_group!(
benches,
unicode_words_arabic,
unicode_words_english,
Expand All @@ -61,4 +58,4 @@ benchmark_group!(
unicode_words_source_code,
);

benchmark_main!(benches);
criterion_main!(benches);
55 changes: 26 additions & 29 deletions benches/word_bounds.rs
@@ -1,55 +1,52 @@
#[macro_use]
extern crate bencher;
extern crate unicode_segmentation;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

use bencher::Bencher;
use std::fs;
use unicode_segmentation::UnicodeSegmentation;

fn word_bounds(bench: &mut Bencher, path: &str) {
fn word_bounds(c: &mut Criterion, lang: &str, path: &str) {
let text = fs::read_to_string(path).unwrap();
bench.iter(|| {
for w in text.split_word_bounds() {
bencher::black_box(w);
}
c.bench_function(&format!("word_bounds_{}", lang), |bench| {
bench.iter(|| {
for w in text.split_word_bounds() {
black_box(w);
}
});
});

bench.bytes = text.len() as u64;
}

fn word_bounds_arabic(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/arabic.txt");
fn word_bounds_arabic(c: &mut Criterion) {
word_bounds(c, "arabic", "benches/texts/arabic.txt");
}

fn word_bounds_english(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/english.txt");
fn word_bounds_english(c: &mut Criterion) {
word_bounds(c, "english", "benches/texts/english.txt");
}

fn word_bounds_hindi(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/hindi.txt");
fn word_bounds_hindi(c: &mut Criterion) {
word_bounds(c, "hindi", "benches/texts/hindi.txt");
}

fn word_bounds_japanese(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/japanese.txt");
fn word_bounds_japanese(c: &mut Criterion) {
word_bounds(c, "japanese", "benches/texts/japanese.txt");
}

fn word_bounds_korean(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/korean.txt");
fn word_bounds_korean(c: &mut Criterion) {
word_bounds(c, "korean", "benches/texts/korean.txt");
}

fn word_bounds_mandarin(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/mandarin.txt");
fn word_bounds_mandarin(c: &mut Criterion) {
word_bounds(c, "mandarin", "benches/texts/mandarin.txt");
}

fn word_bounds_russian(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/russian.txt");
fn word_bounds_russian(c: &mut Criterion) {
word_bounds(c, "russian", "benches/texts/russian.txt");
}

fn word_bounds_source_code(bench: &mut Bencher) {
word_bounds(bench, "benches/texts/source_code.txt");
fn word_bounds_source_code(c: &mut Criterion) {
word_bounds(c, "source_code", "benches/texts/source_code.txt");
}

benchmark_group!(
criterion_group!(
benches,
word_bounds_arabic,
word_bounds_english,
Expand All @@ -61,4 +58,4 @@ benchmark_group!(
word_bounds_source_code,
);

benchmark_main!(benches);
criterion_main!(benches);

0 comments on commit 243af2c

Please sign in to comment.