Skip to content

Commit

Permalink
Handle multiple Content-Encodings (urllib3#1442)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbramleycl authored and sethmlarson committed Sep 17, 2018
1 parent a345e51 commit e387612
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Expand Up @@ -4,6 +4,8 @@ Changes
dev (master)
------------

* Add ability to handle multiple Content-Encodings (Issue #1441 and Pull #1442)

* Skip DNS names that can't be idna-decoded when using pyOpenSSL (Issue #1405).

* Add a server_hostname parameter to HTTPSConnection which allows for
Expand Down
3 changes: 3 additions & 0 deletions CONTRIBUTORS.txt
Expand Up @@ -266,5 +266,8 @@ In chronological order:
* Hugo van Kemenade <https://github.com/hugovk>
* Drop support for EOL Python 2.6

* Justin Bramley <https://github.com/jbramleycl>
* Add ability to handle multiple Content-Encodings

* [Your name or handle] <[email or website]>
* [Brief summary of your changes]
33 changes: 31 additions & 2 deletions src/urllib3/response.py
Expand Up @@ -90,7 +90,31 @@ def decompress(self, data):
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)


class MultiDecoder(object):
"""
From RFC7231:
If one or more encodings have been applied to a representation, the
sender that applied the encodings MUST generate a Content-Encoding
header field that lists the content codings in the order in which
they were applied.
"""

def __init__(self, modes):
self._decoders = [_get_decoder(m.strip()) for m in modes.split(',')]

def flush(self):
return self._decoders[0].flush()

def decompress(self, data):
for d in reversed(self._decoders):
data = d.decompress(data)
return data


def _get_decoder(mode):
if ',' in mode:
return MultiDecoder(mode)

if mode == 'gzip':
return GzipDecoder()

Expand Down Expand Up @@ -283,8 +307,13 @@ def _init_decoder(self):
# Note: content-encoding value should be case-insensitive, per RFC 7230
# Section 3.2
content_encoding = self.headers.get('content-encoding', '').lower()
if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
self._decoder = _get_decoder(content_encoding)
if self._decoder is None:
if content_encoding in self.CONTENT_DECODERS:
self._decoder = _get_decoder(content_encoding)
elif ',' in content_encoding:
encodings = [e.strip() for e in content_encoding.split(',') if e.strip() in self.CONTENT_DECODERS]
if len(encodings):
self._decoder = _get_decoder(content_encoding)

def _decode(self, data, decode_content, flush_decoder):
"""
Expand Down
32 changes: 32 additions & 0 deletions test/test_response.py
Expand Up @@ -208,6 +208,38 @@ def test_chunked_decoding_gzip_swallow_garbage(self):

assert r.data == b'foofoofoo'

def test_multi_decoding_deflate_deflate(self):
data = zlib.compress(zlib.compress(b'foo'))

fp = BytesIO(data)
r = HTTPResponse(fp, headers={'content-encoding': 'deflate, deflate'})

assert r.data == b'foo'

def test_multi_decoding_deflate_gzip(self):
compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
data = compress.compress(zlib.compress(b'foo'))
data += compress.flush()

fp = BytesIO(data)
r = HTTPResponse(fp, headers={'content-encoding': 'deflate, gzip'})

assert r.data == b'foo'

def test_multi_decoding_gzip_gzip(self):
compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
data = compress.compress(b'foo')
data += compress.flush()

compress = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
data = compress.compress(data)
data += compress.flush()

fp = BytesIO(data)
r = HTTPResponse(fp, headers={'content-encoding': 'gzip, gzip'})

assert r.data == b'foo'

def test_body_blob(self):
resp = HTTPResponse(b'foo')
assert resp.data == b'foo'
Expand Down

0 comments on commit e387612

Please sign in to comment.