diff --git a/requests/compat.py b/requests/compat.py index ee28551b8f..882e7abbd4 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -11,10 +11,13 @@ try: import chardet except ImportError: - import charset_normalizer as chardet - import warnings + try: + import charset_normalizer as chardet + import warnings - warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer') + warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer') + except ImportError: + chardet = None import sys diff --git a/requests/models.py b/requests/models.py index aa6fb86e4e..aaf26fdcf2 100644 --- a/requests/models.py +++ b/requests/models.py @@ -732,7 +732,17 @@ def next(self): @property def apparent_encoding(self): """The apparent encoding, provided by the charset_normalizer or chardet libraries.""" - return chardet.detect(self.content)['encoding'] + # If chardet/charset_normalizer is available, use it. + if chardet: + return chardet.detect(self.content)['encoding'] + # Fall back to trying simpler, dumber means. + for encoding in ("ascii", "utf-8"): + try: + self.content.decode(encoding, "strict") + return encoding + except UnicodeDecodeError: + pass + raise ContentDecodingError("Unable to detect response encoding") def iter_content(self, chunk_size=1, decode_unicode=False): """Iterates over the response data. When stream=True is set on the @@ -862,7 +872,15 @@ def text(self): # Fallback to auto-detected encoding. if self.encoding is None: - encoding = self.apparent_encoding + try: + encoding = self.apparent_encoding + except ContentDecodingError: + raise ContentDecodingError( + "Unable to automatically detect the response's encoding. " + "If you know the response's encoding, you can set it manually (`.encoding`), or " + "install either the `chardet` or `charset_normalizer` library to make automatic " + "detection smarter." + ) # Decode unicode from given encoding. try: diff --git a/tests/test_testserver.py b/tests/test_testserver.py index aac529261b..cd60a44d6e 100644 --- a/tests/test_testserver.py +++ b/tests/test_testserver.py @@ -54,8 +54,34 @@ def test_text_response(self): assert r.status_code == 200 assert r.text == u'roflol' + assert not r.encoding + assert r.apparent_encoding == 'ascii' assert r.headers['Content-Length'] == '6' + def test_text_response_utf_8(self, mocker): + """ + test `.apparent_encoding` is able to infer UTF-8 + """ + mocker.patch('requests.models.chardet', new=None) + response_unicode = u"Törkylempijävongahdus" + response_length = len(response_unicode.encode("utf-8")) + # `text_response_server` takes care of encoding to UTF-8 internally + server = Server.text_response_server(( + u"HTTP/1.1 200 OK\r\n" + "Content-Length: {}\r\n" + "\r\n" + "{}" + ).format(response_length, response_unicode)) + + with server as (host, port): + r = requests.get('http://{}:{}'.format(host, port)) + + assert r.status_code == 200 + assert r.text == response_unicode + assert not r.encoding + assert r.apparent_encoding == 'utf-8' + assert r.headers['Content-Length'] == str(response_length) + def test_basic_response(self): """the basic response server returns an empty http response""" with Server.basic_response_server() as (host, port):