From 595514e56a9756ba033c0b3f78e466e8407e0b32 Mon Sep 17 00:00:00 2001 From: TAHRI Ahmed R Date: Wed, 14 Jul 2021 12:16:08 -0700 Subject: [PATCH] :bug: Fix too small JSON payload triggering md with high ratio (#59) --- charset_normalizer/md.py | 2 +- tests/test_on_byte.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/charset_normalizer/md.py b/charset_normalizer/md.py index af0610a2..1dbd8d82 100644 --- a/charset_normalizer/md.py +++ b/charset_normalizer/md.py @@ -56,7 +56,7 @@ def eligible(self, character: str) -> bool: def feed(self, character: str) -> None: self._character_count += 1 - if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";"]: + if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";", "{", "}", "[", "]"]: if is_punctuation(character): self._punctuation_count += 1 elif character.isdigit() is False and is_symbol(character): diff --git a/tests/test_on_byte.py b/tests/test_on_byte.py index 945fff90..ed8a9173 100644 --- a/tests/test_on_byte.py +++ b/tests/test_on_byte.py @@ -27,6 +27,18 @@ def test_empty_bytes(self): len(r.alphabets) ) + def test_on_empty_json(self): + + with self.subTest("Detecting empty JSON as ASCII"): + results = from_bytes(b"{}").best() + self.assertIsNotNone( + results.best() + ) + self.assertEqual( + results.best().encoding, + "ascii" + ) + def test_bom_detection(self): with self.subTest('GB18030 UNAVAILABLE SIG'): self.assertFalse(