Skip to content

Commit

Permalink
Distinguish between final and intermediate ResultDict (#266)
Browse files Browse the repository at this point in the history
The IntermediateResultDict has optional fields where as the final, all
fields can't be None.

Carried over from the typeshed types:
https://github.com/python/typeshed/blob/ee09a67c5cf61effaae1bdb894571eac4fc9ac6b/stubs/chardet/chardet/__init__.pyi
  • Loading branch information
jdufresne committed Jul 13, 2022
1 parent 64ddfd6 commit 023e7ea
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
8 changes: 4 additions & 4 deletions chardet/__init__.py
Expand Up @@ -20,14 +20,14 @@
from .charsetgroupprober import CharSetGroupProber
from .charsetprober import CharSetProber
from .enums import InputState
from .resultdict import ResultDict
from .resultdict import FinalResultDict, IntermediateResultDict
from .universaldetector import UniversalDetector
from .version import VERSION, __version__

__all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"]


def detect(byte_str: Union[bytes, bytearray]) -> ResultDict:
def detect(byte_str: Union[bytes, bytearray]) -> FinalResultDict:
"""
Detect the encoding of the given byte string.
Expand All @@ -47,7 +47,7 @@ def detect(byte_str: Union[bytes, bytearray]) -> ResultDict:

def detect_all(
byte_str: Union[bytes, bytearray], ignore_threshold: bool = False
) -> List[ResultDict]:
) -> List[IntermediateResultDict]:
"""
Detect all the possible encodings of the given byte string.
Expand All @@ -70,7 +70,7 @@ def detect_all(
detector.close()

if detector.input_state == InputState.HIGH_BYTE:
results: List[ResultDict] = []
results: List[IntermediateResultDict] = []
probers: List[CharSetProber] = []
for prober in detector.charset_probers:
if isinstance(prober, CharSetGroupProber):
Expand Down
10 changes: 8 additions & 2 deletions chardet/resultdict.py
Expand Up @@ -7,10 +7,16 @@
# for Python 3.7.
from typing import TypedDict

class ResultDict(TypedDict):
class FinalResultDict(TypedDict):
encoding: str
confidence: float
language: str

class IntermediateResultDict(TypedDict):
encoding: Optional[str]
confidence: float
language: Optional[str]

else:
ResultDict = dict
FinalResultDict = dict
IntermediateResultDict = dict
12 changes: 6 additions & 6 deletions chardet/universaldetector.py
Expand Up @@ -39,7 +39,7 @@ class a user of ``chardet`` should use.
import codecs
import logging
import re
from typing import List, Optional, Union
from typing import List, Optional, Union, cast

from .charsetgroupprober import CharSetGroupProber
from .charsetprober import CharSetProber
Expand All @@ -48,7 +48,7 @@ class a user of ``chardet`` should use.
from .latin1prober import Latin1Prober
from .macromanprober import MacRomanProber
from .mbcsgroupprober import MBCSGroupProber
from .resultdict import ResultDict
from .resultdict import FinalResultDict, IntermediateResultDict
from .sbcsgroupprober import SBCSGroupProber
from .utf1632prober import UTF1632Prober

Expand Down Expand Up @@ -89,7 +89,7 @@ def __init__(self, lang_filter: LanguageFilter = LanguageFilter.ALL) -> None:
self._esc_charset_prober: Optional[EscCharSetProber] = None
self._utf1632_prober: Optional[UTF1632Prober] = None
self._charset_probers: List[CharSetProber] = []
self.result: ResultDict = {
self.result: IntermediateResultDict = {
"encoding": None,
"confidence": 0.0,
"language": None,
Expand Down Expand Up @@ -263,7 +263,7 @@ def feed(self, byte_str: Union[bytes, bytearray]) -> None:
if self.WIN_BYTE_DETECTOR.search(byte_str):
self._has_win_bytes = True

def close(self) -> ResultDict:
def close(self) -> FinalResultDict:
"""
Stop analyzing the current document and come up with a final
prediction.
Expand All @@ -273,7 +273,7 @@ def close(self) -> ResultDict:
"""
# Don't bother with checks if we're already done
if self.done:
return self.result
return cast(FinalResultDict, self.result)
self.done = True

if not self._got_data:
Expand Down Expand Up @@ -335,4 +335,4 @@ def close(self) -> ResultDict:
group_prober.language,
group_prober.get_confidence(),
)
return self.result
return cast(FinalResultDict, self.result)

0 comments on commit 023e7ea

Please sign in to comment.