Skip to content

Commit

Permalink
Make chardet/charset_normalizer an optional dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
akx committed Jul 15, 2021
1 parent 0712b82 commit cf3fbf0
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 5 deletions.
9 changes: 6 additions & 3 deletions requests/compat.py
Expand Up @@ -11,10 +11,13 @@
try:
import chardet
except ImportError:
import charset_normalizer as chardet
import warnings
try:
import charset_normalizer as chardet
import warnings

warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer')
warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer')
except ImportError:
chardet = None


import sys
Expand Down
22 changes: 20 additions & 2 deletions requests/models.py
Expand Up @@ -732,7 +732,17 @@ def next(self):
@property
def apparent_encoding(self):
"""The apparent encoding, provided by the charset_normalizer or chardet libraries."""
return chardet.detect(self.content)['encoding']
# If chardet/charset_normalizer is available, use it.
if chardet:
return chardet.detect(self.content)['encoding']
# Fall back to trying simpler, dumber means.
for encoding in ("ascii", "utf-8"):
try:
self.content.decode(encoding, "strict")
return encoding
except UnicodeDecodeError:
pass
raise ContentDecodingError("Unable to detect response encoding")

def iter_content(self, chunk_size=1, decode_unicode=False):
"""Iterates over the response data. When stream=True is set on the
Expand Down Expand Up @@ -862,7 +872,15 @@ def text(self):

# Fallback to auto-detected encoding.
if self.encoding is None:
encoding = self.apparent_encoding
try:
encoding = self.apparent_encoding
except ContentDecodingError:
raise ContentDecodingError(
"Unable to automatically detect the response's encoding. "
"If you know the response's encoding, you can set it manually (`.encoding`), or "
"install either the `chardet` or `charset_normalizer` library to make automatic "
"detection smarter."
)

# Decode unicode from given encoding.
try:
Expand Down
26 changes: 26 additions & 0 deletions tests/test_testserver.py
Expand Up @@ -54,8 +54,34 @@ def test_text_response(self):

assert r.status_code == 200
assert r.text == u'roflol'
assert not r.encoding
assert r.apparent_encoding == 'ascii'
assert r.headers['Content-Length'] == '6'

def test_text_response_utf_8(self, mocker):
"""
test `.apparent_encoding` is able to infer UTF-8
"""
mocker.patch('requests.models.chardet', new=None)
response_unicode = u"Törkylempijävongahdus"
response_length = len(response_unicode.encode("utf-8"))
# `text_response_server` takes care of encoding to UTF-8 internally
server = Server.text_response_server((
u"HTTP/1.1 200 OK\r\n"
"Content-Length: {}\r\n"
"\r\n"
"{}"
).format(response_length, response_unicode))

with server as (host, port):
r = requests.get('http://{}:{}'.format(host, port))

assert r.status_code == 200
assert r.text == response_unicode
assert not r.encoding
assert r.apparent_encoding == 'utf-8'
assert r.headers['Content-Length'] == str(response_length)

def test_basic_response(self):
"""the basic response server returns an empty http response"""
with Server.basic_response_server() as (host, port):
Expand Down

0 comments on commit cf3fbf0

Please sign in to comment.