diff --git a/CHANGES.rst b/CHANGES.rst index 3d19f8ec13..5f2f65559c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,8 @@ Changes dev (master) ------------ +* Add ability to handle multiple Content-Encodings (Issue #1441 and Pull #1442) + * Skip DNS names that can't be idna-decoded when using pyOpenSSL (Issue #1405). * Add a server_hostname parameter to HTTPSConnection which allows for diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 2413592138..7d868a8f7d 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -266,5 +266,8 @@ In chronological order: * Hugo van Kemenade * Drop support for EOL Python 2.6 +* Justin Bramley + * Add ability to handle multiple Content-Encodings + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/src/urllib3/response.py b/src/urllib3/response.py index 606238abd4..f0cfbb5499 100644 --- a/src/urllib3/response.py +++ b/src/urllib3/response.py @@ -90,7 +90,31 @@ def decompress(self, data): self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) +class MultiDecoder(object): + """ + From RFC7231: + If one or more encodings have been applied to a representation, the + sender that applied the encodings MUST generate a Content-Encoding + header field that lists the content codings in the order in which + they were applied. + """ + + def __init__(self, modes): + self._decoders = [_get_decoder(m.strip()) for m in modes.split(',')] + + def flush(self): + return self._decoders[0].flush() + + def decompress(self, data): + for d in reversed(self._decoders): + data = d.decompress(data) + return data + + def _get_decoder(mode): + if ',' in mode: + return MultiDecoder(mode) + if mode == 'gzip': return GzipDecoder() @@ -283,8 +307,13 @@ def _init_decoder(self): # Note: content-encoding value should be case-insensitive, per RFC 7230 # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None and content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + elif ',' in content_encoding: + encodings = [e.strip() for e in content_encoding.split(',') if e.strip() in self.CONTENT_DECODERS] + if len(encodings): + self._decoder = _get_decoder(content_encoding) def _decode(self, data, decode_content, flush_decoder): """ diff --git a/test/test_response.py b/test/test_response.py index 79a879003e..ae70dff900 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -208,6 +208,38 @@ def test_chunked_decoding_gzip_swallow_garbage(self): assert r.data == b'foofoofoo' + def test_multi_decoding_deflate_deflate(self): + data = zlib.compress(zlib.compress(b'foo')) + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate, deflate'}) + + assert r.data == b'foo' + + def test_multi_decoding_deflate_gzip(self): + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(zlib.compress(b'foo')) + data += compress.flush() + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'deflate, gzip'}) + + assert r.data == b'foo' + + def test_multi_decoding_gzip_gzip(self): + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(b'foo') + data += compress.flush() + + compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + data = compress.compress(data) + data += compress.flush() + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'gzip, gzip'}) + + assert r.data == b'foo' + def test_body_blob(self): resp = HTTPResponse(b'foo') assert resp.data == b'foo'