From 023e7eaecff492d817a0f496e1b0641f2e29927b Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Wed, 13 Jul 2022 08:04:50 -0700 Subject: [PATCH] Distinguish between final and intermediate ResultDict (#266) The IntermediateResultDict has optional fields where as the final, all fields can't be None. Carried over from the typeshed types: https://github.com/python/typeshed/blob/ee09a67c5cf61effaae1bdb894571eac4fc9ac6b/stubs/chardet/chardet/__init__.pyi --- chardet/__init__.py | 8 ++++---- chardet/resultdict.py | 10 ++++++++-- chardet/universaldetector.py | 12 ++++++------ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/chardet/__init__.py b/chardet/__init__.py index 2112ee14..69afe7ef 100644 --- a/chardet/__init__.py +++ b/chardet/__init__.py @@ -20,14 +20,14 @@ from .charsetgroupprober import CharSetGroupProber from .charsetprober import CharSetProber from .enums import InputState -from .resultdict import ResultDict +from .resultdict import FinalResultDict, IntermediateResultDict from .universaldetector import UniversalDetector from .version import VERSION, __version__ __all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"] -def detect(byte_str: Union[bytes, bytearray]) -> ResultDict: +def detect(byte_str: Union[bytes, bytearray]) -> FinalResultDict: """ Detect the encoding of the given byte string. @@ -47,7 +47,7 @@ def detect(byte_str: Union[bytes, bytearray]) -> ResultDict: def detect_all( byte_str: Union[bytes, bytearray], ignore_threshold: bool = False -) -> List[ResultDict]: +) -> List[IntermediateResultDict]: """ Detect all the possible encodings of the given byte string. @@ -70,7 +70,7 @@ def detect_all( detector.close() if detector.input_state == InputState.HIGH_BYTE: - results: List[ResultDict] = [] + results: List[IntermediateResultDict] = [] probers: List[CharSetProber] = [] for prober in detector.charset_probers: if isinstance(prober, CharSetGroupProber): diff --git a/chardet/resultdict.py b/chardet/resultdict.py index 7d36e64c..bf889be3 100644 --- a/chardet/resultdict.py +++ b/chardet/resultdict.py @@ -7,10 +7,16 @@ # for Python 3.7. from typing import TypedDict - class ResultDict(TypedDict): + class FinalResultDict(TypedDict): + encoding: str + confidence: float + language: str + + class IntermediateResultDict(TypedDict): encoding: Optional[str] confidence: float language: Optional[str] else: - ResultDict = dict + FinalResultDict = dict + IntermediateResultDict = dict diff --git a/chardet/universaldetector.py b/chardet/universaldetector.py index f94961e6..b3658474 100644 --- a/chardet/universaldetector.py +++ b/chardet/universaldetector.py @@ -39,7 +39,7 @@ class a user of ``chardet`` should use. import codecs import logging import re -from typing import List, Optional, Union +from typing import List, Optional, Union, cast from .charsetgroupprober import CharSetGroupProber from .charsetprober import CharSetProber @@ -48,7 +48,7 @@ class a user of ``chardet`` should use. from .latin1prober import Latin1Prober from .macromanprober import MacRomanProber from .mbcsgroupprober import MBCSGroupProber -from .resultdict import ResultDict +from .resultdict import FinalResultDict, IntermediateResultDict from .sbcsgroupprober import SBCSGroupProber from .utf1632prober import UTF1632Prober @@ -89,7 +89,7 @@ def __init__(self, lang_filter: LanguageFilter = LanguageFilter.ALL) -> None: self._esc_charset_prober: Optional[EscCharSetProber] = None self._utf1632_prober: Optional[UTF1632Prober] = None self._charset_probers: List[CharSetProber] = [] - self.result: ResultDict = { + self.result: IntermediateResultDict = { "encoding": None, "confidence": 0.0, "language": None, @@ -263,7 +263,7 @@ def feed(self, byte_str: Union[bytes, bytearray]) -> None: if self.WIN_BYTE_DETECTOR.search(byte_str): self._has_win_bytes = True - def close(self) -> ResultDict: + def close(self) -> FinalResultDict: """ Stop analyzing the current document and come up with a final prediction. @@ -273,7 +273,7 @@ def close(self) -> ResultDict: """ # Don't bother with checks if we're already done if self.done: - return self.result + return cast(FinalResultDict, self.result) self.done = True if not self._got_data: @@ -335,4 +335,4 @@ def close(self) -> ResultDict: group_prober.language, group_prober.get_confidence(), ) - return self.result + return cast(FinalResultDict, self.result)