Handle multiple Content-Encodings (urllib3#1442)

Dobatymo · Sep 17, 2018 · e387612 · e387612
1 parent a345e51
commit e387612
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 2 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -4,6 +4,8 @@ Changes
 dev (master)
 ------------
 
+* Add ability to handle multiple Content-Encodings (Issue #1441 and Pull #1442)
+
 * Skip DNS names that can't be idna-decoded when using pyOpenSSL (Issue #1405).
 
 * Add a server_hostname parameter to HTTPSConnection which allows for

diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
@@ -266,5 +266,8 @@ In chronological order:
 * Hugo van Kemenade <https://github.com/hugovk>
   * Drop support for EOL Python 2.6
 
+* Justin Bramley <https://github.com/jbramleycl>
+  * Add ability to handle multiple Content-Encodings
+
 * [Your name or handle] <[email or website]>
   * [Brief summary of your changes]
diff --git a/src/urllib3/response.py b/src/urllib3/response.py
@@ -90,7 +90,31 @@ def decompress(self, data):
             self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
 
 
+class MultiDecoder(object):
+    """
+    From RFC7231:
+        If one or more encodings have been applied to a representation, the
+        sender that applied the encodings MUST generate a Content-Encoding
+        header field that lists the content codings in the order in which
+        they were applied.
+    """
+
+    def __init__(self, modes):
+        self._decoders = [_get_decoder(m.strip()) for m in modes.split(',')]
+
+    def flush(self):
+        return self._decoders[0].flush()
+
+    def decompress(self, data):
+        for d in reversed(self._decoders):
+            data = d.decompress(data)
+        return data
+
+
 def _get_decoder(mode):
+    if ',' in mode:
+        return MultiDecoder(mode)
+
     if mode == 'gzip':
         return GzipDecoder()
 
@@ -283,8 +307,13 @@ def _init_decoder(self):
         # Note: content-encoding value should be case-insensitive, per RFC 7230
         # Section 3.2
         content_encoding = self.headers.get('content-encoding', '').lower()
-        if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
-            self._decoder = _get_decoder(content_encoding)
+        if self._decoder is None:
+            if content_encoding in self.CONTENT_DECODERS:
+                self._decoder = _get_decoder(content_encoding)
+            elif ',' in content_encoding:
+                encodings = [e.strip() for e in content_encoding.split(',') if e.strip() in self.CONTENT_DECODERS]
+                if len(encodings):
+                    self._decoder = _get_decoder(content_encoding)
 
     def _decode(self, data, decode_content, flush_decoder):
         """

diff --git a/test/test_response.py b/test/test_response.py
@@ -208,6 +208,38 @@ def test_chunked_decoding_gzip_swallow_garbage(self):
 
         assert r.data == b'foofoofoo'
 
+    def test_multi_decoding_deflate_deflate(self):
+        data = zlib.compress(zlib.compress(b'foo'))
+
+        fp = BytesIO(data)
+        r = HTTPResponse(fp, headers={'content-encoding': 'deflate, deflate'})
+
+        assert r.data == b'foo'
+
+    def test_multi_decoding_deflate_gzip(self):
+        compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
+        data = compress.compress(zlib.compress(b'foo'))
+        data += compress.flush()
+
+        fp = BytesIO(data)
+        r = HTTPResponse(fp, headers={'content-encoding': 'deflate, gzip'})
+
+        assert r.data == b'foo'
+
+    def test_multi_decoding_gzip_gzip(self):
+        compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
+        data = compress.compress(b'foo')
+        data += compress.flush()
+
+        compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
+        data = compress.compress(data)
+        data += compress.flush()
+
+        fp = BytesIO(data)
+        r = HTTPResponse(fp, headers={'content-encoding': 'gzip, gzip'})
+
+        assert r.data == b'foo'
+
     def test_body_blob(self):
         resp = HTTPResponse(b'foo')
         assert resp.data == b'foo'