Skip to content

Commit

Permalink
🐛 Fix too small JSON payload triggering md with high ratio (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ousret committed Jul 14, 2021
1 parent 53b2dab commit 595514e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion charset_normalizer/md.py
Expand Up @@ -56,7 +56,7 @@ def eligible(self, character: str) -> bool:
def feed(self, character: str) -> None:
self._character_count += 1

if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";"]:
if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";", "{", "}", "[", "]"]:
if is_punctuation(character):
self._punctuation_count += 1
elif character.isdigit() is False and is_symbol(character):
Expand Down
12 changes: 12 additions & 0 deletions tests/test_on_byte.py
Expand Up @@ -27,6 +27,18 @@ def test_empty_bytes(self):
len(r.alphabets)
)

def test_on_empty_json(self):

with self.subTest("Detecting empty JSON as ASCII"):
results = from_bytes(b"{}").best()
self.assertIsNotNone(
results.best()
)
self.assertEqual(
results.best().encoding,
"ascii"
)

def test_bom_detection(self):
with self.subTest('GB18030 UNAVAILABLE SIG'):
self.assertFalse(
Expand Down

0 comments on commit 595514e

Please sign in to comment.