-
Notifications
You must be signed in to change notification settings - Fork 108
/
dev.rs
62 lines (54 loc) · 2.18 KB
/
dev.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
//! This mod exposes some internal API.
//! It exists only to enable tuning of the library with extra supporting tools (e.g. benchmarks).
//! Developers are advised against relying on API.
//!
pub use crate::alphabets::{raw_detect as alphabets_raw_detect, RawOutcome as RawAlphabetsInfo};
pub use crate::combined::{raw_detect as combined_raw_detect, RawOutcome as RawCombinedInfo};
pub use crate::core::{detect, detect_lang, detect_with_options, Detector, Info, Method, Options};
pub use crate::lang::Lang;
pub use crate::scripts::{detect_script, raw_detect_script, RawScriptInfo, Script};
pub use crate::trigrams::{raw_detect as trigrams_raw_detect, RawOutcome as RawTrigramsInfo};
pub use crate::alphabets::cyrillic::alphabet_calculate_scores as alphabet_cyrillic_calculate_scores;
pub use crate::alphabets::latin::alphabet_calculate_scores as alphabet_latin_calculate_scores;
pub use crate::core::{FilterList, LowercaseText};
// private imports
use crate::core::detect::detect_lang_base_on_mandarin_script;
use crate::core::Query;
use crate::scripts::grouping::ScriptLangGroup;
#[derive(Debug)]
pub struct RawInfo {
pub script_info: RawScriptInfo,
pub lang_info: Option<RawLangInfo>,
}
#[derive(Debug)]
pub enum RawLangInfo {
OneScript(Lang),
MultiScript(RawCombinedInfo),
Mandarin(Lang),
}
pub fn raw_detect(text: &str) -> RawInfo {
let script_info = raw_detect_script(text);
let query = Query {
text,
filter_list: &FilterList::default(),
method: Method::Combined,
};
let lang_info = script_info
.main_script()
.map(|script| match script.to_lang_group() {
ScriptLangGroup::One(lang) => RawLangInfo::OneScript(lang),
ScriptLangGroup::Multi(multi_lang_script) => {
let iquery = query.to_internal(multi_lang_script);
let combined = combined_raw_detect(&iquery);
RawLangInfo::MultiScript(combined)
}
ScriptLangGroup::Mandarin => {
let lang = detect_lang_base_on_mandarin_script(&query, &script_info).lang();
RawLangInfo::Mandarin(lang)
}
});
RawInfo {
script_info,
lang_info,
}
}