Skip to content

Commit

Permalink
Switch LGPL'd chardet for MIT licensed charset_normalizer
Browse files Browse the repository at this point in the history
Although using the (non-vendored) chardet library is fine for requests
itself, but using a LGPL dependency the story is a lot less clear for
downstream projects, particularly ones that might like to bundle
requests (and thus chardet) in to a single binary -- think something
similar to what docker-compose is doing. By including an LGPL'd module
it is no longer clear if the resulting artefact must also be LGPL'd.

By changing out this dependency for one under MIT we remove all license
ambiguity.

As an "escape hatch" I have made the code so that it will use chardet
first if it is installed, but we no longer depend upon it directly,
although there is a new extra added, `requests[lgpl]`. This should
minimize the impact to users, and give them an escape hatch if
charset_normalizer turns out to be not as good. (In my non-exhaustive
tests it detects the same encoding as chartdet in every case I threw at
it)
  • Loading branch information
ashb committed Apr 22, 2021
1 parent c45a4df commit 65fe1ce
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 20 deletions.
5 changes: 5 additions & 0 deletions HISTORY.md
Expand Up @@ -6,6 +6,11 @@ dev

- \[Short description of non-trivial change.\]

**Dependencies**

- Switch chardet for the MIT-licensed charset_normalizer to remove license
ambiguity for projects bundling requests.

2.25.1 (2020-12-16)
-------------------

Expand Down
5 changes: 3 additions & 2 deletions docs/user/advanced.rst
Expand Up @@ -697,8 +697,9 @@ Encodings
When you receive a response, Requests makes a guess at the encoding to
use for decoding the response when you access the :attr:`Response.text
<requests.Response.text>` attribute. Requests will first check for an
encoding in the HTTP header, and if none is present, will use `chardet
<https://pypi.org/project/chardet/>`_ to attempt to guess the encoding.
encoding in the HTTP header, and if none is present, will use
`charset_normalizer <https://pypi.org/project/charset_normalizer/>`_ to attempt
to guess the encoding.

The only time Requests will not do this is if no explicit charset
is present in the HTTP headers **and** the ``Content-Type``
Expand Down
18 changes: 9 additions & 9 deletions requests/__init__.py
Expand Up @@ -41,12 +41,12 @@
"""

import urllib3
import chardet
import charset_normalizer
import warnings
from .exceptions import RequestsDependencyWarning


def check_compatibility(urllib3_version, chardet_version):
def check_compatibility(urllib3_version, charset_normalizer_version):
urllib3_version = urllib3_version.split('.')
assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git.

Expand All @@ -62,11 +62,11 @@ def check_compatibility(urllib3_version, chardet_version):
assert minor >= 21
assert minor <= 26

# Check chardet for compatibility.
major, minor, patch = chardet_version.split('.')[:3]
# Check charset_normalizer for compatibility.
major, minor, patch = charset_normalizer_version.split('.')[:3]
major, minor, patch = int(major), int(minor), int(patch)
# chardet >= 3.0.2, < 5.0.0
assert (3, 0, 2) <= (major, minor, patch) < (5, 0, 0)
# charset_normalizer >= 3.0.2, < 5.0.0
assert (1, 3, 5) <= (major, minor, patch) < (2, 0, 0)


def _check_cryptography(cryptography_version):
Expand All @@ -82,10 +82,10 @@ def _check_cryptography(cryptography_version):

# Check imported dependencies for compatibility.
try:
check_compatibility(urllib3.__version__, chardet.__version__)
check_compatibility(urllib3.__version__, charset_normalizer.__version__)
except (AssertionError, ValueError):
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
"version!".format(urllib3.__version__, chardet.__version__),
warnings.warn("urllib3 ({}) or charset_normalizer ({}) doesn't match a supported "
"version!".format(urllib3.__version__, charset_normalizer.__version__),
RequestsDependencyWarning)

# Attempt to enable urllib3's fallback for SNI support
Expand Down
5 changes: 4 additions & 1 deletion requests/compat.py
Expand Up @@ -8,7 +8,10 @@
Python 3.
"""

import chardet
try:
import chardet
except ImportError:
import charset_normalizer as chardet

import sys

Expand Down
20 changes: 18 additions & 2 deletions requests/help.py
Expand Up @@ -8,10 +8,19 @@

import idna
import urllib3
import chardet

from . import __version__ as requests_version

try:
import charset_normalizer
except ImportError:
charset_normalizer = None

try:
import chardet
except ImportError:
chardet = None

try:
from urllib3.contrib import pyopenssl
except ImportError:
Expand Down Expand Up @@ -71,7 +80,12 @@ def info():

implementation_info = _implementation()
urllib3_info = {'version': urllib3.__version__}
chardet_info = {'version': chardet.__version__}
charset_normalizer_info = {'version': None}
chardet_info = {'version': None}
if charset_normalizer:
charset_normalizer_info = {'version': charset_normalizer.__version__}
if chardet:
chardet_info = {'version': chardet.__version__}

pyopenssl_info = {
'version': None,
Expand Down Expand Up @@ -99,9 +113,11 @@ def info():
'implementation': implementation_info,
'system_ssl': system_ssl_info,
'using_pyopenssl': pyopenssl is not None,
'using_charset_normalizer': chardet is None,
'pyOpenSSL': pyopenssl_info,
'urllib3': urllib3_info,
'chardet': chardet_info,
'charset_normalizer': charset_normalizer_info,
'cryptography': cryptography_info,
'idna': idna_info,
'requests': {
Expand Down
6 changes: 3 additions & 3 deletions requests/models.py
Expand Up @@ -726,7 +726,7 @@ def next(self):

@property
def apparent_encoding(self):
"""The apparent encoding, provided by the chardet library."""
"""The apparent encoding, provided by the charset_normalizer or chardet libraries."""
return chardet.detect(self.content)['encoding']

def iter_content(self, chunk_size=1, decode_unicode=False):
Expand Down Expand Up @@ -840,7 +840,7 @@ def text(self):
"""Content of the response, in unicode.
If Response.encoding is None, encoding will be guessed using
``chardet``.
``charset_normalizer`` or ``chardet``.
The encoding of the response content is determined based solely on HTTP
headers, following RFC 2616 to the letter. If you can take advantage of
Expand Down Expand Up @@ -888,7 +888,7 @@ def json(self, **kwargs):
if not self.encoding and self.content and len(self.content) > 3:
# No encoding set. JSON RFC 4627 section 3 states we should expect
# UTF-8, -16 or -32. Detect which one to use; If the detection or
# decoding fails, fall back to `self.text` (using chardet to make
# decoding fails, fall back to `self.text` (using charset_normalizer to make
# a best guess).
encoding = guess_json_utf(self.content)
if encoding is not None:
Expand Down
5 changes: 3 additions & 2 deletions requests/packages.py
Expand Up @@ -3,12 +3,13 @@
# This code exists for backwards compatibility reasons.
# I don't like it either. Just look the other way. :)

for package in ('urllib3', 'idna', 'chardet'):
for package, alias in (('urllib3', 'urllib3'), ('idna', 'idna'), ('charset_normalizer', 'chardet')):
locals()[package] = __import__(package)
locals()[alias] = locals()[package]
# This traversal is apparently necessary such that the identities are
# preserved (requests.packages.urllib3.* is urllib3.*)
for mod in list(sys.modules):
if mod == package or mod.startswith(package + '.'):
sys.modules['requests.packages.' + mod] = sys.modules[mod]
sys.modules['requests.packages.' + mod.replace(package, alias)] = sys.modules[mod]

# Kinda cool, though, right?
3 changes: 2 additions & 1 deletion setup.py
Expand Up @@ -41,7 +41,7 @@ def run_tests(self):
packages = ['requests']

requires = [
'chardet>=3.0.2,<5',
'charset_normalizer>=1.3.5,<2',
'idna>=2.5,<3',
'urllib3>=1.21.1,<1.27',
'certifi>=2017.4.17'
Expand Down Expand Up @@ -103,6 +103,7 @@ def run_tests(self):
'security': ['pyOpenSSL >= 0.14', 'cryptography>=1.3.4'],
'socks': ['PySocks>=1.5.6, !=1.5.7'],
'socks:sys_platform == "win32" and python_version == "2.7"': ['win_inet_pton'],
'lgpl': ['chardet>=3.0.2,<5']
},
project_urls={
'Documentation': 'https://requests.readthedocs.io',
Expand Down

0 comments on commit 65fe1ce

Please sign in to comment.