Skip to content

Commit

Permalink
❇️ Adjust the MD to lower the sensitivity around certain CSVs (#69)
Browse files Browse the repository at this point in the history
* ❇️ Adjust the MD to lower the sensitivity around certain CSVs

..having many columns.

* 🔧 ✔️ 0.01 MD ratio diff on single subtest is OK

* 🔧 Remove tab duplicate, using vertical tab instead
  • Loading branch information
Ousret committed Jul 23, 2021
1 parent 87a5a98 commit 9fc6985
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions charset_normalizer/md.py
Expand Up @@ -56,7 +56,7 @@ def eligible(self, character: str) -> bool:
def feed(self, character: str) -> None:
self._character_count += 1

if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";", "{", "}", "[", "]"]:
if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";", "{", "}", "[", "]", ",", "|", '"']:
if is_punctuation(character):
self._punctuation_count += 1
elif character.isdigit() is False and is_symbol(character):
Expand Down Expand Up @@ -116,7 +116,7 @@ def eligible(self, character: str) -> bool:
return True

def feed(self, character: str) -> None:
if character not in {'\n', '\t', '\r'} and character.isprintable() is False:
if character not in {'\n', '\t', '\r', '\v'} and character.isprintable() is False:
self._unprintable_count += 1
self._character_count += 1

Expand Down
2 changes: 1 addition & 1 deletion tests/test_probe_chaos.py
Expand Up @@ -40,7 +40,7 @@ def test_subtle_gibberish(self):

self.assertGreater(
mess_ratio("´Á¥½³ø§i -- ±i®Ìºû, ³¯·Ø©v"),
0.8
0.7
)

self.assertGreater(
Expand Down

0 comments on commit 9fc6985

Please sign in to comment.