diff --git a/chardet/sbcharsetprober.py b/chardet/sbcharsetprober.py index e29ddec2..a49e8160 100644 --- a/chardet/sbcharsetprober.py +++ b/chardet/sbcharsetprober.py @@ -104,21 +104,9 @@ def feed(self, byte_str): language_model = self._model.language_model for char in byte_str: order = char_to_order_map.get(char, CharacterCategory.UNDEFINED) - # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but - # CharacterCategory.SYMBOL is actually 253, so we use CONTROL - # to make it closer to the original intent. The only difference - # is whether or not we count digits and control characters for - # _total_char purposes. - if order < CharacterCategory.CONTROL: + if order < CharacterCategory.SYMBOL: self._total_char += 1 -<<<<<<< HEAD - if order < self.SAMPLE_SIZE: -======= - # TODO: Follow uchardet's lead and discount confidence for frequent - # control characters. - # See https://github.com/BYVoid/uchardet/commit/55b4f23971db61 if order < CharacterCategory.CONTROL: ->>>>>>> 36504df (Add SingleByteCharSetModel for use by new model training code) self._freq_char += 1 if self._last_order < CharacterCategory.CONTROL: self._total_seqs += 1