From 370d9ee14ed95931e26c50d5606682580a703457 Mon Sep 17 00:00:00 2001
From: Ahmed TAHRI <ahmed.tahri@cloudnursery.dev>
Date: Sun, 14 Aug 2022 19:20:59 +0200
Subject: [PATCH] :art: Enable strict type check and improve the project typing

Following https://github.com/Ousret/charset_normalizer/issues/182
---
 .github/workflows/lint.yml           |  2 +-
 charset_normalizer/api.py            | 22 +++++++++++-----------
 charset_normalizer/cd.py             |  4 ++--
 charset_normalizer/cli/normalizer.py |  4 ++--
 charset_normalizer/models.py         |  4 ++--
 charset_normalizer/utils.py          | 14 ++++++++++----
 6 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 02e770ca..877b890e 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -28,7 +28,7 @@ jobs:
         python setup.py install
     - name: Type checking (Mypy)
       run: |
-        mypy charset_normalizer
+        mypy --strict charset_normalizer
     - name: Import sorting check (isort)
       run: |
         isort --check charset_normalizer
diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py
index ae08361b..621d1fce 100644
--- a/charset_normalizer/api.py
+++ b/charset_normalizer/api.py
@@ -1,7 +1,7 @@
 import logging
 from os import PathLike
 from os.path import basename, splitext
-from typing import BinaryIO, List, Optional, Set
+from typing import BinaryIO, List, Optional, Set, Any
 
 from .cd import (
     coherence_ratio,
@@ -36,8 +36,8 @@ def from_bytes(
     steps: int = 5,
     chunk_size: int = 512,
     threshold: float = 0.2,
-    cp_isolation: List[str] = None,
-    cp_exclusion: List[str] = None,
+    cp_isolation: Optional[List[str]] = None,
+    cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
 ) -> CharsetMatches:
@@ -486,8 +486,8 @@ def from_fp(
     steps: int = 5,
     chunk_size: int = 512,
     threshold: float = 0.20,
-    cp_isolation: List[str] = None,
-    cp_exclusion: List[str] = None,
+    cp_isolation: Optional[List[str]] = None,
+    cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
 ) -> CharsetMatches:
@@ -508,12 +508,12 @@ def from_fp(
 
 
 def from_path(
-    path: PathLike,
+    path: 'PathLike[Any]',
     steps: int = 5,
     chunk_size: int = 512,
     threshold: float = 0.20,
-    cp_isolation: List[str] = None,
-    cp_exclusion: List[str] = None,
+    cp_isolation: Optional[List[str]] = None,
+    cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
     explain: bool = False,
 ) -> CharsetMatches:
@@ -535,12 +535,12 @@ def from_path(
 
 
 def normalize(
-    path: PathLike,
+    path: 'PathLike[Any]',
     steps: int = 5,
     chunk_size: int = 512,
     threshold: float = 0.20,
-    cp_isolation: List[str] = None,
-    cp_exclusion: List[str] = None,
+    cp_isolation: Optional[List[str]] = None,
+    cp_exclusion: Optional[List[str]] = None,
     preemptive_behaviour: bool = True,
 ) -> CharsetMatch:
     """
diff --git a/charset_normalizer/cd.py b/charset_normalizer/cd.py
index 8998bb54..7d119f6e 100644
--- a/charset_normalizer/cd.py
+++ b/charset_normalizer/cd.py
@@ -24,7 +24,7 @@ def encoding_unicode_range(iana_name: str) -> List[str]:
     if is_multi_byte_encoding(iana_name):
         raise IOError("Function not supported on multi-byte code page")
 
-    decoder = importlib.import_module("encodings.{}".format(iana_name)).IncrementalDecoder  # type: ignore
+    decoder = importlib.import_module("encodings.{}".format(iana_name)).IncrementalDecoder
 
     p: IncrementalDecoder = decoder(errors="ignore")
     seen_ranges: Dict[str, int] = {}
@@ -307,7 +307,7 @@ def coherence_ratio(
         lg_inclusion_list.remove("Latin Based")
 
     for layer in alpha_unicode_split(decoded_sequence):
-        sequence_frequencies: Counter = Counter(layer)
+        sequence_frequencies: Counter[str] = Counter(layer)
         most_common = sequence_frequencies.most_common()
 
         character_count: int = sum(o for c, o in most_common)
diff --git a/charset_normalizer/cli/normalizer.py b/charset_normalizer/cli/normalizer.py
index 540e5e2a..b8b652a5 100644
--- a/charset_normalizer/cli/normalizer.py
+++ b/charset_normalizer/cli/normalizer.py
@@ -3,7 +3,7 @@
 from json import dumps
 from os.path import abspath
 from platform import python_version
-from typing import List
+from typing import List, Optional
 
 try:
     from unicodedata2 import unidata_version
@@ -48,7 +48,7 @@ def query_yes_no(question: str, default: str = "yes") -> bool:
             sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
 
 
-def cli_detect(argv: List[str] = None) -> int:
+def cli_detect(argv: Optional[List[str]] = None) -> int:
     """
     CLI assistant using ARGV and ArgumentParser
     :param argv:
diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py
index b9d71eb4..ddf633d3 100644
--- a/charset_normalizer/models.py
+++ b/charset_normalizer/models.py
@@ -95,7 +95,7 @@ def coherence_non_latin(self) -> float:
         return 0.0
 
     @property
-    def w_counter(self) -> Counter:
+    def w_counter(self) -> Counter[str]:
         """
         Word counter instance on decoded text.
         Notice: Will be removed in 3.0
@@ -280,7 +280,7 @@ class CharsetMatches:
     Act like a list(iterable) but does not implements all related methods.
     """
 
-    def __init__(self, results: List[CharsetMatch] = None):
+    def __init__(self, results: Optional[List[CharsetMatch]] = None):
         self._results: List[CharsetMatch] = sorted(results) if results else []
 
     def __iter__(self) -> Iterator[CharsetMatch]:
diff --git a/charset_normalizer/utils.py b/charset_normalizer/utils.py
index 0640deb0..f12edf4d 100644
--- a/charset_normalizer/utils.py
+++ b/charset_normalizer/utils.py
@@ -13,7 +13,7 @@
 from re import findall
 from typing import Generator, List, Optional, Set, Tuple, Union
 
-from _multibytecodec import MultibyteIncrementalDecoder  # type: ignore
+from _multibytecodec import MultibyteIncrementalDecoder
 
 from .constant import (
     ENCODING_MARKS,
@@ -231,6 +231,9 @@ def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional
     for specified_encoding in results:
         specified_encoding = specified_encoding.lower().replace("-", "_")
 
+        encoding_alias: str
+        encoding_iana: str
+
         for encoding_alias, encoding_iana in aliases.items():
             if encoding_alias == specified_encoding:
                 return encoding_iana
@@ -256,7 +259,7 @@ def is_multi_byte_encoding(name: str) -> bool:
         "utf_32_be",
         "utf_7",
     } or issubclass(
-        importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,  # type: ignore
+        importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,
         MultibyteIncrementalDecoder,
     )
 
@@ -286,6 +289,9 @@ def should_strip_sig_or_bom(iana_encoding: str) -> bool:
 def iana_name(cp_name: str, strict: bool = True) -> str:
     cp_name = cp_name.lower().replace("-", "_")
 
+    encoding_alias: str
+    encoding_iana: str
+
     for encoding_alias, encoding_iana in aliases.items():
         if cp_name in [encoding_alias, encoding_iana]:
             return encoding_iana
@@ -315,8 +321,8 @@ def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
     if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
         return 0.0
 
-    decoder_a = importlib.import_module("encodings.{}".format(iana_name_a)).IncrementalDecoder  # type: ignore
-    decoder_b = importlib.import_module("encodings.{}".format(iana_name_b)).IncrementalDecoder  # type: ignore
+    decoder_a = importlib.import_module("encodings.{}".format(iana_name_a)).IncrementalDecoder
+    decoder_b = importlib.import_module("encodings.{}".format(iana_name_b)).IncrementalDecoder
 
     id_a: IncrementalDecoder = decoder_a(errors="ignore")
     id_b: IncrementalDecoder = decoder_b(errors="ignore")