Skip to content

Commit

Permalink
Add WASM support (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
pemistahl committed Mar 25, 2022
1 parent 3219091 commit 1ef8280
Show file tree
Hide file tree
Showing 6 changed files with 365 additions and 10 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

/pkg/
/target/
**/*.rs.bk

Expand All @@ -31,6 +32,7 @@ out/
*.bak
*.tmp
*.class
*.html
.buildpath
.classpath
.vscode/*
Expand All @@ -49,4 +51,4 @@ $RECYCLE.BIN/
Desktop.ini
ehthumbs.db

src/main.rs
src/main.rs
74 changes: 73 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 10 additions & 4 deletions Cargo.toml
Expand Up @@ -17,7 +17,7 @@ members = ["language-models/*"]

[package]
name = "lingua"
version = "1.3.3"
version = "1.4.0-SNAPSHOT"
authors = ["Peter M. Stahl <pemistahl@gmail.com>"]
description = """
An accurate natural language detection library, suitable for long and short text alike
Expand All @@ -36,6 +36,9 @@ keywords = [
"nlp"
]

[lib]
crate-type = ["cdylib", "rlib"]

[dependencies]
fraction = "0.10.0"
include_dir = "0.7.2"
Expand Down Expand Up @@ -125,6 +128,9 @@ lingua-xhosa-language-model = { path = "language-models/xh", version = "=1.0.1",
lingua-yoruba-language-model = { path = "language-models/yo", version = "=1.0.1", optional = true }
lingua-zulu-language-model = { path = "language-models/zu", version = "=1.0.1", optional = true }

[target.'cfg(target_family = "wasm")'.dependencies]
wasm-bindgen = { version = "0.2", features = ["serde-serialize"] }

[dev-dependencies]
cld2 = "1.0.2"
float-cmp = "0.9.0"
Expand All @@ -136,8 +142,9 @@ titlecase = "1.1.0"
whatlang = "0.13.0"

[features]
default = [
"parallelism",
default = ["parallelism", "all-languages"]
parallelism = ["rayon"]
all-languages = [
"afrikaans", "albanian", "arabic", "armenian", "azerbaijani", "basque",
"belarusian", "bengali", "bokmal", "bosnian", "bulgarian", "catalan",
"chinese", "croatian", "czech", "danish", "dutch", "english", "esperanto",
Expand All @@ -151,7 +158,6 @@ default = [
"tsonga", "tswana", "turkish", "ukrainian", "urdu", "vietnamese",
"welsh", "xhosa", "yoruba", "zulu"
]
parallelism = ["rayon"]
afrikaans = ["lingua-afrikaans-language-model"]
albanian = ["lingua-albanian-language-model"]
arabic = ["lingua-arabic-language-model"]
Expand Down
12 changes: 8 additions & 4 deletions src/builder.rs
Expand Up @@ -19,7 +19,11 @@ use crate::isocode::{IsoCode639_1, IsoCode639_3};
use crate::language::Language;
use std::collections::HashSet;

const MISSING_LANGUAGE_MESSAGE: &str = "LanguageDetector needs at least 2 languages to choose from";
pub(crate) const MISSING_LANGUAGE_MESSAGE: &str =
"LanguageDetector needs at least 2 languages to choose from";

pub(crate) const MINIMUM_RELATIVE_DISTANCE_MESSAGE: &str =
"Minimum relative distance must lie in between 0.0 and 0.99";

/// This struct configures and creates an instance of [LanguageDetector].
pub struct LanguageDetectorBuilder {
Expand Down Expand Up @@ -141,7 +145,7 @@ impl LanguageDetectorBuilder {
/// ⚠ Panics if `distance` is smaller than 0.0 or greater than 0.99.
pub fn with_minimum_relative_distance(&mut self, distance: f64) -> &mut Self {
if !(0.0..=0.99).contains(&distance) {
panic!("minimum relative distance must lie in between 0.0 and 0.99");
panic!("{}", MINIMUM_RELATIVE_DISTANCE_MESSAGE);
}
self.minimum_relative_distance = distance;
self
Expand Down Expand Up @@ -303,13 +307,13 @@ mod tests {
}

#[test]
#[should_panic(expected = "minimum relative distance must lie in between 0.0 and 0.99")]
#[should_panic(expected = "Minimum relative distance must lie in between 0.0 and 0.99")]
fn assert_detector_cannot_be_built_from_too_small_minimum_relative_distance() {
LanguageDetectorBuilder::from_all_languages().with_minimum_relative_distance(-2.3);
}

#[test]
#[should_panic(expected = "minimum relative distance must lie in between 0.0 and 0.99")]
#[should_panic(expected = "Minimum relative distance must lie in between 0.0 and 0.99")]
fn assert_detector_cannot_be_built_from_too_large_minimum_relative_distance() {
LanguageDetectorBuilder::from_all_languages().with_minimum_relative_distance(1.7);
}
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Expand Up @@ -281,6 +281,9 @@ mod model;
mod ngram;
mod writer;

#[cfg(target_family = "wasm")]
mod wasm;

pub use builder::LanguageDetectorBuilder;
pub use detector::LanguageDetector;
pub use isocode::{IsoCode639_1, IsoCode639_3};
Expand Down

0 comments on commit 1ef8280

Please sign in to comment.