Skip to content

Commit

Permalink
Add common-use kanji detection for Japanese. greyblake#122
Browse files Browse the repository at this point in the history
  • Loading branch information
miiton committed Sep 11, 2022
1 parent cebf034 commit 508ae36
Show file tree
Hide file tree
Showing 3 changed files with 297 additions and 0 deletions.
25 changes: 25 additions & 0 deletions src/scripts/chars.rs
@@ -1,3 +1,5 @@
use super::common_use_kanji::is_common_use_kanji;

pub(crate) fn is_cyrillic(ch: char) -> bool {
matches!(ch,
'\u{0400}'..='\u{0484}'
Expand Down Expand Up @@ -85,6 +87,10 @@ pub(crate) fn is_hiragana(ch: char) -> bool {
matches!(ch, '\u{3040}'..='\u{309F}')
}

pub(crate) fn is_kanji(ch: char) -> bool {
is_common_use_kanji(ch)
}

pub(crate) fn is_katakana(ch: char) -> bool {
matches!(ch, '\u{30A0}'..='\u{30FF}')
}
Expand Down Expand Up @@ -229,6 +235,25 @@ mod tests {
assert_eq!(is_hiragana('a'), false);
}

#[test]
fn test_is_mandarin() {
assert_eq!(is_mandarin('東'), true);
assert_eq!(is_mandarin('东'), true);
assert_eq!(is_mandarin('a'), false);
assert_eq!(is_mandarin('1'), false);
assert_eq!(is_mandarin('_'), false);
}

#[test]
fn test_is_kanji() {
assert_eq!(is_kanji('東'), true);
assert_eq!(is_kanji('东'), false);
assert_eq!(is_kanji('あ'), false);
assert_eq!(is_kanji('a'), false);
assert_eq!(is_kanji('1'), false);
assert_eq!(is_kanji('_'), false);
}

#[test]
fn test_is_hangul() {
assert_eq!(is_hangul('ᄁ'), true);
Expand Down

0 comments on commit 508ae36

Please sign in to comment.