diff --git a/HISTORY.md b/HISTORY.md index 0331d187f7..d31dbe05a4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,11 @@ dev - \[Short description of non-trivial change.\] +**Dependencies** + +- Switch chardet for the MIT-licensed charset_normalizer to remove license + ambiguity for projects bundling requests. + 2.25.1 (2020-12-16) ------------------- diff --git a/docs/user/advanced.rst b/docs/user/advanced.rst index 68f36999ef..6f61fd16f7 100644 --- a/docs/user/advanced.rst +++ b/docs/user/advanced.rst @@ -697,8 +697,9 @@ Encodings When you receive a response, Requests makes a guess at the encoding to use for decoding the response when you access the :attr:`Response.text ` attribute. Requests will first check for an -encoding in the HTTP header, and if none is present, will use `chardet -`_ to attempt to guess the encoding. +encoding in the HTTP header, and if none is present, will use +`charset_normalizer `_ to attempt +to guess the encoding. The only time Requests will not do this is if no explicit charset is present in the HTTP headers **and** the ``Content-Type`` diff --git a/requests/__init__.py b/requests/__init__.py index f8f94295f9..9085319cc2 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -41,12 +41,12 @@ """ import urllib3 -import chardet +import charset_normalizer import warnings from .exceptions import RequestsDependencyWarning -def check_compatibility(urllib3_version, chardet_version): +def check_compatibility(urllib3_version, charset_normalizer_version): urllib3_version = urllib3_version.split('.') assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git. @@ -62,11 +62,11 @@ def check_compatibility(urllib3_version, chardet_version): assert minor >= 21 assert minor <= 26 - # Check chardet for compatibility. - major, minor, patch = chardet_version.split('.')[:3] + # Check charset_normalizer for compatibility. + major, minor, patch = charset_normalizer_version.split('.')[:3] major, minor, patch = int(major), int(minor), int(patch) - # chardet >= 3.0.2, < 5.0.0 - assert (3, 0, 2) <= (major, minor, patch) < (5, 0, 0) + # charset_normalizer >= 3.0.2, < 5.0.0 + assert (1, 3, 5) <= (major, minor, patch) < (2, 0, 0) def _check_cryptography(cryptography_version): @@ -82,10 +82,10 @@ def _check_cryptography(cryptography_version): # Check imported dependencies for compatibility. try: - check_compatibility(urllib3.__version__, chardet.__version__) + check_compatibility(urllib3.__version__, charset_normalizer.__version__) except (AssertionError, ValueError): - warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported " - "version!".format(urllib3.__version__, chardet.__version__), + warnings.warn("urllib3 ({}) or charset_normalizer ({}) doesn't match a supported " + "version!".format(urllib3.__version__, charset_normalizer.__version__), RequestsDependencyWarning) # Attempt to enable urllib3's fallback for SNI support diff --git a/requests/compat.py b/requests/compat.py index 5de0769f50..0b14f5015c 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -8,7 +8,10 @@ Python 3. """ -import chardet +try: + import chardet +except ImportError: + import charset_normalizer as chardet import sys diff --git a/requests/help.py b/requests/help.py index e53d35ef6d..4cd6389f55 100644 --- a/requests/help.py +++ b/requests/help.py @@ -8,10 +8,19 @@ import idna import urllib3 -import chardet from . import __version__ as requests_version +try: + import charset_normalizer +except ImportError: + charset_normalizer = None + +try: + import chardet +except ImportError: + chardet = None + try: from urllib3.contrib import pyopenssl except ImportError: @@ -71,7 +80,12 @@ def info(): implementation_info = _implementation() urllib3_info = {'version': urllib3.__version__} - chardet_info = {'version': chardet.__version__} + charset_normalizer_info = {'version': None} + chardet_info = {'version': None} + if charset_normalizer: + charset_normalizer_info = {'version': charset_normalizer.__version__} + if chardet: + chardet_info = {'version': chardet.__version__} pyopenssl_info = { 'version': None, @@ -99,9 +113,11 @@ def info(): 'implementation': implementation_info, 'system_ssl': system_ssl_info, 'using_pyopenssl': pyopenssl is not None, + 'using_charset_normalizer': chardet is None, 'pyOpenSSL': pyopenssl_info, 'urllib3': urllib3_info, 'chardet': chardet_info, + 'charset_normalizer': charset_normalizer_info, 'cryptography': cryptography_info, 'idna': idna_info, 'requests': { diff --git a/requests/models.py b/requests/models.py index 34b672c289..f8d149dc48 100644 --- a/requests/models.py +++ b/requests/models.py @@ -726,7 +726,7 @@ def next(self): @property def apparent_encoding(self): - """The apparent encoding, provided by the chardet library.""" + """The apparent encoding, provided by the charset_normalizer or chardet libraries.""" return chardet.detect(self.content)['encoding'] def iter_content(self, chunk_size=1, decode_unicode=False): @@ -840,7 +840,7 @@ def text(self): """Content of the response, in unicode. If Response.encoding is None, encoding will be guessed using - ``chardet``. + ``charset_normalizer`` or ``chardet``. The encoding of the response content is determined based solely on HTTP headers, following RFC 2616 to the letter. If you can take advantage of @@ -888,7 +888,7 @@ def json(self, **kwargs): if not self.encoding and self.content and len(self.content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or - # decoding fails, fall back to `self.text` (using chardet to make + # decoding fails, fall back to `self.text` (using charset_normalizer to make # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: diff --git a/requests/packages.py b/requests/packages.py index 7232fe0ff7..9da77d70dd 100644 --- a/requests/packages.py +++ b/requests/packages.py @@ -3,12 +3,13 @@ # This code exists for backwards compatibility reasons. # I don't like it either. Just look the other way. :) -for package in ('urllib3', 'idna', 'chardet'): +for package, alias in (('urllib3', 'urllib3'), ('idna', 'idna'), ('charset_normalizer', 'chardet')): locals()[package] = __import__(package) + locals()[alias] = locals()[package] # This traversal is apparently necessary such that the identities are # preserved (requests.packages.urllib3.* is urllib3.*) for mod in list(sys.modules): if mod == package or mod.startswith(package + '.'): - sys.modules['requests.packages.' + mod] = sys.modules[mod] + sys.modules['requests.packages.' + mod.replace(package, alias)] = sys.modules[mod] # Kinda cool, though, right? diff --git a/setup.py b/setup.py index 552c66de69..3a890ae718 100755 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def run_tests(self): packages = ['requests'] requires = [ - 'chardet>=3.0.2,<5', + 'charset_normalizer>=1.3.5,<2', 'idna>=2.5,<3', 'urllib3>=1.21.1,<1.27', 'certifi>=2017.4.17' @@ -103,6 +103,7 @@ def run_tests(self): 'security': ['pyOpenSSL >= 0.14', 'cryptography>=1.3.4'], 'socks': ['PySocks>=1.5.6, !=1.5.7'], 'socks:sys_platform == "win32" and python_version == "2.7"': ['win_inet_pton'], + 'lgpl': ['chardet>=3.0.2,<5'] }, project_urls={ 'Documentation': 'https://requests.readthedocs.io',