From 1bbe2c40cc42927f75f597cd7377377e58974c85 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Sun, 1 Oct 2023 06:30:44 -0400 Subject: [PATCH 1/2] raw_detect perf tweaks - applied clippy::needless_for_each lint (see https://rust-lang.github.io/rust-clippy/master/index.html#needless_for_each) - declare scores vec with_capacity to minimize allocs - explicitly declared vars (benchmarks show it helped) --- src/combined/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/combined/mod.rs b/src/combined/mod.rs index 10cbc53..f8c4c39 100644 --- a/src/combined/mod.rs +++ b/src/combined/mod.rs @@ -33,25 +33,25 @@ pub fn detect(iquery: &InternalQuery) -> Option { // TODO: optimize! pub fn raw_detect(iquery: &InternalQuery) -> RawOutcome { - let alphabet_raw_outcome = alphabets::raw_detect(iquery); - let trigram_raw_outcome = trigrams::raw_detect(iquery); + let alphabet_raw_outcome: alphabets::RawOutcome = alphabets::raw_detect(iquery); + let trigram_raw_outcome: trigrams::RawOutcome = trigrams::raw_detect(iquery); - let alphabet_scores = &alphabet_raw_outcome.scores; - let trigram_scores = &trigram_raw_outcome.scores; + let alphabet_scores: &Vec<(Lang, f64)> = &alphabet_raw_outcome.scores; + let trigram_scores: &Vec<(Lang, f64)> = &trigram_raw_outcome.scores; let mut all_langs: Vec = alphabet_scores.iter().map(|x| x.0).collect(); - trigram_scores.iter().for_each(|(lang, _)| { + for (lang, _) in trigram_scores.iter() { if !all_langs.contains(lang) { all_langs.push(*lang); } - }); + } let count = alphabet_raw_outcome.count; let alphabet_weight = calc_alphabet_weight(count); let trigram_weight = 1.0 - alphabet_weight; - let mut scores = vec![]; + let mut scores = Vec::with_capacity(alphabet_scores.len() + trigram_scores.len()); for lang in all_langs { let a: f64 = alphabet_scores From 25888fec3c93b3502e4faa8c3802d842fb976a4e Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Thu, 4 Jan 2024 05:49:10 -0500 Subject: [PATCH 2/2] Update src/combined/mod.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ænжej Theodorko --- src/combined/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/combined/mod.rs b/src/combined/mod.rs index f8c4c39..3ad69bc 100644 --- a/src/combined/mod.rs +++ b/src/combined/mod.rs @@ -51,7 +51,7 @@ pub fn raw_detect(iquery: &InternalQuery) -> RawOutcome { let alphabet_weight = calc_alphabet_weight(count); let trigram_weight = 1.0 - alphabet_weight; - let mut scores = Vec::with_capacity(alphabet_scores.len() + trigram_scores.len()); + let mut scores = Vec::with_capacity(all_langs.len()); for lang in all_langs { let a: f64 = alphabet_scores