diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py index 7ad912cb..388e841b 100644 --- a/charset_normalizer/api.py +++ b/charset_normalizer/api.py @@ -118,6 +118,7 @@ def from_bytes( fallback_ascii = None # type: Optional[CharsetMatch] fallback_u8 = None # type: Optional[CharsetMatch] + fallback_specified = None # type: Optional[CharsetMatch] single_byte_hard_failure_count = 0 # type: int single_byte_soft_failure_count = 0 # type: int @@ -255,7 +256,7 @@ def from_bytes( early_stop_count, round(mean_mess_ratio * 100, ndigits=3)) # Preparing those fallbacks in case we got nothing. - if encoding_iana in ["ascii", "utf_8"]: + if encoding_iana in ["ascii", "utf_8", specified_encoding]: fallback_entry = CharsetMatch( sequences, encoding_iana, @@ -264,7 +265,9 @@ def from_bytes( [], decoded_payload ) - if encoding_iana == "ascii": + if encoding_iana == specified_encoding: + fallback_specified = fallback_entry + elif encoding_iana == "ascii": fallback_ascii = fallback_entry else: fallback_u8 = fallback_entry @@ -332,10 +335,13 @@ def from_bytes( ) if len(results) == 0: - if fallback_u8 or fallback_ascii: - logger.warning("Nothing got out of the detection process. Using ASCII/UTF-8 fallback.") + if fallback_u8 or fallback_ascii or fallback_specified: + logger.warning("Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.") - if (fallback_u8 and fallback_ascii is None) or (fallback_u8 and fallback_u8.fingerprint != fallback_ascii.fingerprint): + if fallback_specified: + logger.warning("%s will be used as a fallback match", fallback_specified.encoding) + results.append(fallback_specified) + elif (fallback_u8 and fallback_ascii is None) or (fallback_u8 and fallback_u8.fingerprint != fallback_ascii.fingerprint): logger.warning("utf_8 will be used as a fallback match") results.append(fallback_u8) elif fallback_ascii: