-
-
Notifications
You must be signed in to change notification settings - Fork 49
/
test_probe_chaos.py
80 lines (61 loc) · 3.01 KB
/
test_probe_chaos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# coding: utf-8
import unittest
from charset_normalizer.md import mess_ratio
class TestProbeChaos(unittest.TestCase):
def test_not_gibberish(self):
with self.subTest('Chinese Not Gibberish'):
self.assertLessEqual(
mess_ratio(
'典肇乎庚辰年十二月廿一,及己丑年二月十九,收各方語言二百五十,合逾七百萬目;二十大卷佔八成,單英文卷亦過二百萬。悉文乃天下有志共筆而成;有意助之,幾網路、隨纂作,大典茁焉。'),
0.
)
with self.subTest('Arabic Not Gibberish'):
self.assertEqual(
mess_ratio('العقلية , التنويم المغناطيسي و / أو الاقتراح'),
0.
)
with self.subTest('Arabic Styled Not Gibberish'):
self.assertEqual(
mess_ratio("RadoZ تـــعــــديــل الـــتــــوقــيــــت مـــن قــبــل"),
0.
)
def test_subtle_gibberish(self):
self.assertLessEqual(
mess_ratio("Cehennemin Sava■þ²s²'da kim?"),
0.5
)
self.assertGreaterEqual(
mess_ratio("Cehennemin Sava■þ²s²'da kim?"),
0.
)
self.assertGreater(
mess_ratio("´Á¥½³ø§i -- ±i®Ìºû, ³¯·Ø©v"),
0.7
)
self.assertGreater(
mess_ratio("ïstanbul, T■rkiye'nin en kalabal»k, iktisadi ve k■lt■rel aÓ»dan en —nemli"),
0.
)
self.assertLessEqual(
mess_ratio("ïstanbul, T■rkiye'nin en kalabal»k, iktisadi ve k■lt■rel aÓ»dan en —nemli"),
0.5
)
self.assertLessEqual(
mess_ratio("<i>Parce que Óa, c'est la vÕritable histoire de la rencontre avec votre Tante Robin.</i>"),
0.5
)
def test_complete_gibberish(self):
self.assertTrue(
mess_ratio("""ØĢØŠØģاØĶŲ ŲŲ ØĢŲ اŲŲاØģ ŲŲŲ
Ų
ا ØģŲŲŲØŠØģاØĶŲŲŲØ ØŊØđŲا ŲØģŲ
Øđ ØđŲ (ŲØąŲØŊŲ) ŲاŲØŪا؊Ų
""") >= 0.1,
)
self.assertTrue(
mess_ratio("""ÇáÚŞáíÉ , ÇáÊäæíã ÇáãÛäÇØíÓí æ / Ãæ ÇáÇŞÊÑÇÍ""") >= 0.1,
)
def test_part_gibberish(self):
self.assertGreater(
mess_ratio(
"""hishamkoc@yahoo.com ุชุฑุฌู
ููุฉ ููุดูููุงู
ุงููููููููุงูRadoZ ุชูููุนููููุฏูููู ุงููููุชูููููููููููููุช ู
ูููู ูููุจููู""", 0.5),
0.3
)
if __name__ == '__main__':
unittest.main()