Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wsgi: Work around CPython bug when parsing non-ASCII headers #574

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 25 additions & 5 deletions eventlet/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,10 +674,13 @@ def get_environ(self):

ct = self.headers.get('content-type')
if ct is None:
ct_was_none = True
Comment on lines 676 to +677
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if ct is None:
ct_was_none = True
ct_was_none = ct is None
if ct_was_none:

try:
ct = self.headers.type
except AttributeError:
ct = self.headers.get_content_type()
else:
ct_was_none = False
Comment on lines +682 to +683
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
else:
ct_was_none = False

env['CONTENT_TYPE'] = ct

length = self.headers.get('content-length')
Expand All @@ -694,16 +697,33 @@ def get_environ(self):
env['REMOTE_PORT'] = str(client_addr[1])
env['GATEWAY_INTERFACE'] = 'CGI/1.1'

try:
if six.PY2:
headers = self.headers.headers
except AttributeError:
headers = self.headers._headers
else:
headers = [h.split(':', 1) for h in headers]
else:
headers = self.headers._headers
payload = self.headers.get_payload()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh jeeze -- apparently this may return a str or a list of one or more messages (if Content-Type is message/rfc822).

😞

if payload:
# There shouldn't be a message associated with the headers;
# must've bumped up against https://bugs.python.org/issue37093
for line in payload.rstrip('\r\n').split('\n'):
if ':' not in line or line[:1] in ' \t':
# Well, we're no more broken than we were before...
# Should we support line folding? Should we 400 a bad header line?
break
header, value = line.split(':', 1)
headers.append((header, value))
if ct_was_none and header.lower() == 'content-type':
env['CONTENT_TYPE'] = value.strip(' \t\n\r')
elif length is None and header.lower() == 'content-length':
length = env['CONTENT_LENGTH'] = value.strip(' \t\n\r')

env['headers_raw'] = headers_raw = tuple((k, v.strip(' \t\n\r')) for k, v in headers)
for k, v in headers_raw:
k = k.replace('-', '_').upper()
if six.PY2:
k = k.replace('-', '_').upper()
else:
k = k.replace('-', '_').encode('latin1').upper().decode('latin1')
if k in ('CONTENT_TYPE', 'CONTENT_LENGTH'):
# These do not get the HTTP_ prefix and were handled above
continue
Expand Down
53 changes: 53 additions & 0 deletions tests/wsgi_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,59 @@ def wsgi_app(environ, start_response):
assert isinstance(g[1], str), msg
assert g[1] == '/\xbd\xa5\xe5\xa0\xbd\xe4'

def test_headers_latin1(self):
g = []

def wsgi_app(environ, start_response):
g.append(environ)
start_response("200 OK", [])
return [environ['wsgi.input'].read()]

self.site.application = wsgi_app
sock = eventlet.connect(self.server_addr)
sock.sendall(b'PUT / HTTP/1.1\r\n'
b'Snow-Man: ' + u'\N{SNOWMAN}'.encode('utf8') + b'\r\n'
b'Utf-8-' + u'\U0001F334'.encode('utf-8') + b': palm tree\r\n'
b'Content-Length: 4\r\n'
b'\r\ndata')
result = read_http(sock)
assert result.status == 'HTTP/1.1 200 OK'
assert result.body == b'data'

assert 'HTTP_SNOW_MAN' in g[0]
# WSGI demands native strings, either as bytes or decoded-Latin-1
assert isinstance(g[0]['HTTP_SNOW_MAN'], str)
assert g[0]['HTTP_SNOW_MAN'] == '\xE2\x98\x83'

assert 'HTTP_UTF_8_\xF0\x9F\x8C\xB4' in [h for h in g[0] if h.startswith('HTTP_')]
assert isinstance(g[0]['HTTP_UTF_8_\xF0\x9F\x8C\xB4'], str)
assert g[0]['HTTP_UTF_8_\xF0\x9F\x8C\xB4'] == 'palm tree'

sock.sendall(b'PUT / HTTP/1.1\r\n'
b'Connection: close\r\n'
b'Snow-Man: ' + u'\N{SNOWMAN}'.encode('utf8') + b'\r\n'
b'Utf-8-' + u'\U0001F334'.encode('utf-8') + b': palm tree\r\n'
b'Content-Type: foo/bar\r\n'
b'Transfer-Encoding: chunked\r\n'
b'\r\n'
b'e\r\n'
b'Hello, world!\n\r\n'
b'0\r\n\r\n')
result = read_http(sock)
assert result.status == 'HTTP/1.1 200 OK'
assert result.body == b'Hello, world!\n'

assert 'HTTP_SNOW_MAN' in g[1]
# WSGI demands native strings, either as bytes or decoded-Latin-1
assert isinstance(g[1]['HTTP_SNOW_MAN'], str)
assert g[1]['HTTP_SNOW_MAN'] == '\xE2\x98\x83'

assert 'HTTP_UTF_8_\xF0\x9F\x8C\xB4' in [h for h in g[1] if h.startswith('HTTP_')]
assert isinstance(g[1]['HTTP_UTF_8_\xF0\x9F\x8C\xB4'], str)
assert g[1]['HTTP_UTF_8_\xF0\x9F\x8C\xB4'] == 'palm tree'

assert g[1]['CONTENT_TYPE'] == 'foo/bar'

@tests.skip_if_no_ipv6
def test_ipv6(self):
try:
Expand Down