Skip to content

Commit

Permalink
Merge pull request #38 from Jules-Bertholet/refactor-tests
Browse files Browse the repository at this point in the history
Refactor tests
  • Loading branch information
Manishearth committed Apr 23, 2024
2 parents 9c4477c + 49ef069 commit 00ee4b0
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 139 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/rust.yml
Expand Up @@ -7,7 +7,11 @@ on:
branches: [ "master" ]

env:
CARGO_INCREMENTAL: 0
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
RUSTFLAGS: -D warnings
RUSTDOCFLAGS: -D warnings

jobs:
build:
Expand All @@ -18,6 +22,12 @@ jobs:
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
- name: Build docs
run: cargo doc
- name: Check formatting
run: cargo fmt --check
- name: Check clippy
run: cargo clippy --lib --tests
regen:
runs-on: ubuntu-latest
steps:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -3,3 +3,4 @@ Cargo.lock
scripts/tmp
scripts/*.txt
scripts/*.rs
bench_data/*
9 changes: 6 additions & 3 deletions Cargo.toml
Expand Up @@ -2,7 +2,10 @@

name = "unicode-width"
version = "0.1.11"
authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
authors = [
"kwantam <kwantam@gmail.com>",
"Manish Goregaokar <manishsmail@gmail.com>",
]

homepage = "https://github.com/unicode-rs/unicode-width"
repository = "https://github.com/unicode-rs/unicode-width"
Expand All @@ -14,8 +17,9 @@ description = """
Determine displayed width of `char` and `str` types
according to Unicode Standard Annex #11 rules.
"""
edition = "2021"

exclude = [ "target/*", "Cargo.lock" ]
exclude = ["target/*", "Cargo.lock"]

[dependencies]
std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
Expand All @@ -27,7 +31,6 @@ unicode-normalization = "0.1.23"

[features]
default = []
bench = []
rustc-dep-of-std = ['std', 'core', 'compiler_builtins']

# Legacy, now a no-op
Expand Down
106 changes: 106 additions & 0 deletions benches/benches.rs
@@ -0,0 +1,106 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(test)]

extern crate test;

use std::iter;

use test::Bencher;

use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

#[bench]
fn cargo(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(UnicodeWidthChar::width(c));
}
});
}

#[bench]
#[allow(deprecated)]
fn stdlib(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(c.width());
}
});
}

#[bench]
fn simple_if(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(simple_width_if(c));
}
});
}

#[bench]
fn simple_match(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(simple_width_match(c));
}
});
}

#[inline]
fn simple_width_if(c: char) -> Option<usize> {
let cu = c as u32;
if cu < 127 {
if cu > 31 {
Some(1)
} else if cu == 0 {
Some(0)
} else {
None
}
} else {
UnicodeWidthChar::width(c)
}
}

#[inline]
fn simple_width_match(c: char) -> Option<usize> {
match c as u32 {
cu if cu == 0 => Some(0),
cu if cu < 0x20 => None,
cu if cu < 0x7f => Some(1),
_ => UnicodeWidthChar::width(c),
}
}

#[bench]
fn enwik8(b: &mut Bencher) {
// To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
let data_path = "bench_data/enwik8";
let string = std::fs::read_to_string(data_path).unwrap_or_default();
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
}

#[bench]
fn jawiki(b: &mut Bencher) {
// To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from
// https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2
let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt";
let string = std::fs::read_to_string(data_path).unwrap_or_default();
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
}
11 changes: 0 additions & 11 deletions src/lib.rs
Expand Up @@ -47,24 +47,13 @@
html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
)]
#![cfg_attr(feature = "bench", feature(test))]
#![no_std]

#[cfg(test)]
#[macro_use]
extern crate std;

#[cfg(feature = "bench")]
extern crate test;

use tables::charwidth as cw;
pub use tables::UNICODE_VERSION;

mod tables;

#[cfg(test)]
mod tests;

/// Methods for determining displayed width of Unicode characters.
pub trait UnicodeWidthChar {
/// Returns the character's displayed width in columns, or `None` if the
Expand Down

0 comments on commit 00ee4b0

Please sign in to comment.