diff --git a/chardet/sbcharsetprober.py b/chardet/sbcharsetprober.py index f47fa0b7..d4a0365c 100644 --- a/chardet/sbcharsetprober.py +++ b/chardet/sbcharsetprober.py @@ -99,12 +99,12 @@ def feed(self, byte_str): language_model = self._model.language_model for char in byte_str: order = char_to_order_map.get(char, CharacterCategory.UNDEFINED) - # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but - # CharacterCategory.SYMBOL is actually 253, so we use CONTROL - # to make it closer to the original intent. The only difference - # is whether or not we count digits and control characters for - # _total_char purposes. - if order < CharacterCategory.CONTROL: + # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but + # CharacterCategory.SYMBOL is actually 253, so we use CONTROL + # to make it closer to the original intent. The only difference + # is whether or not we count digits and control characters for + # _total_char purposes. + if order < CharacterCategory.CONTROL: self._total_char += 1 # TODO: Follow uchardet's lead and discount confidence for frequent # control characters.