Make chardet/charset_normalizer an optional dependency

See psf#5871
akx · Jul 15, 2021 · cf3fbf0 · cf3fbf0
1 parent 0712b82
commit cf3fbf0
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 5 deletions.
diff --git a/requests/compat.py b/requests/compat.py
@@ -11,10 +11,13 @@
 try:
     import chardet
 except ImportError:
-    import charset_normalizer as chardet
-    import warnings
+    try:
+        import charset_normalizer as chardet
+        import warnings
 
-    warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer')
+        warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer')
+    except ImportError:
+        chardet = None
 
 
 import sys

diff --git a/requests/models.py b/requests/models.py
@@ -732,7 +732,17 @@ def next(self):
     @property
     def apparent_encoding(self):
         """The apparent encoding, provided by the charset_normalizer or chardet libraries."""
-        return chardet.detect(self.content)['encoding']
+        # If chardet/charset_normalizer is available, use it.
+        if chardet:
+            return chardet.detect(self.content)['encoding']
+        # Fall back to trying simpler, dumber means.
+        for encoding in ("ascii", "utf-8"):
+            try:
+                self.content.decode(encoding, "strict")
+                return encoding
+            except UnicodeDecodeError:
+                pass
+        raise ContentDecodingError("Unable to detect response encoding")
 
     def iter_content(self, chunk_size=1, decode_unicode=False):
         """Iterates over the response data.  When stream=True is set on the
@@ -862,7 +872,15 @@ def text(self):
 
         # Fallback to auto-detected encoding.
         if self.encoding is None:
-            encoding = self.apparent_encoding
+            try:
+                encoding = self.apparent_encoding
+            except ContentDecodingError:
+                raise ContentDecodingError(
+                    "Unable to automatically detect the response's encoding. "
+                    "If you know the response's encoding, you can set it manually (`.encoding`), or "
+                    "install either the `chardet` or `charset_normalizer` library to make automatic "
+                    "detection smarter."
+                )
 
         # Decode unicode from given encoding.
         try:

diff --git a/tests/test_testserver.py b/tests/test_testserver.py
@@ -54,8 +54,34 @@ def test_text_response(self):
 
             assert r.status_code == 200
             assert r.text == u'roflol'
+            assert not r.encoding
+            assert r.apparent_encoding == 'ascii'
             assert r.headers['Content-Length'] == '6'
 
+    def test_text_response_utf_8(self, mocker):
+        """
+        test `.apparent_encoding` is able to infer UTF-8
+        """
+        mocker.patch('requests.models.chardet', new=None)
+        response_unicode = u"Törkylempijävongahdus"
+        response_length = len(response_unicode.encode("utf-8"))
+        # `text_response_server` takes care of encoding to UTF-8 internally
+        server = Server.text_response_server((
+            u"HTTP/1.1 200 OK\r\n"
+            "Content-Length: {}\r\n"
+            "\r\n"
+            "{}"
+        ).format(response_length, response_unicode))
+
+        with server as (host, port):
+            r = requests.get('http://{}:{}'.format(host, port))
+
+            assert r.status_code == 200
+            assert r.text == response_unicode
+            assert not r.encoding
+            assert r.apparent_encoding == 'utf-8'
+            assert r.headers['Content-Length'] == str(response_length)
+
     def test_basic_response(self):
         """the basic response server returns an empty http response"""
         with Server.basic_response_server() as (host, port):