Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to a more lenient cookie parsing method #900

Merged
merged 3 commits into from Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 31 additions & 9 deletions starlette/requests.py
@@ -1,5 +1,5 @@
import asyncio
import http.cookies
from http import cookies as http_cookies
import json
import typing
from collections.abc import Mapping
Expand All @@ -23,6 +23,33 @@
}


def cookie_parser(cookie_string: str) -> typing.Dict[str, str]:
"""
This function parses a ``Cookie`` HTTP header into a dict of key/value pairs.

It attempts to mimic browser cookie parsing behavior: browsers and web servers
frequently disregard the spec (RFC 6265) when setting and reading cookies,
so we attempt to suit the common scenarios here.

This function has been adapted from Django 3.1.0.
Note: we are explicitly _NOT_ using `SimpleCookie.load` because it is based
on an outdated spec and will fail on lots of input we want to support
"""
cookie_dict: typing.Dict[str, str] = {}
for chunk in cookie_string.split(";"):
if "=" in chunk:
key, val = chunk.split("=", 1)
else:
# Assume an empty name per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
key, val = "", chunk
key, val = key.strip(), val.strip()
if key or val:
# unquote using Python's algorithm.
cookie_dict[key] = http_cookies._unquote(val) # type: ignore
return cookie_dict


class ClientDisconnect(Exception):
pass

Expand Down Expand Up @@ -87,16 +114,11 @@ def path_params(self) -> dict:
@property
def cookies(self) -> typing.Dict[str, str]:
if not hasattr(self, "_cookies"):
cookies = {}
cookies: typing.Dict[str, str] = {}
cookie_header = self.headers.get("cookie")

if cookie_header:
cookie = http.cookies.SimpleCookie() # type: http.cookies.BaseCookie
try:
cookie.load(cookie_header)
except http.cookies.CookieError:
pass
for key, morsel in cookie.items():
cookies[key] = morsel.value
cookies = cookie_parser(cookie_header)
self._cookies = cookies
return self._cookies

Expand Down
103 changes: 100 additions & 3 deletions tests/test_requests.py
Expand Up @@ -285,15 +285,112 @@ async def app(scope, receive, send):
assert response.text == "Hello, cookies!"


def test_invalid_cookie():
def test_cookie_lenient_parsing():
"""
The following test is based on a cookie set by Okta, a well-known authorization service.
It turns out that it's common practice to set cookies that would be invalid according to
the spec.
"""
tough_cookie = (
"provider-oauth-nonce=validAsciiblabla; "
'okta-oauth-redirect-params={"responseType":"code","state":"somestate",'
'"nonce":"somenonce","scopes":["openid","profile","email","phone"],'
'"urls":{"issuer":"https://subdomain.okta.com/oauth2/authServer",'
'"authorizeUrl":"https://subdomain.okta.com/oauth2/authServer/v1/authorize",'
'"userinfoUrl":"https://subdomain.okta.com/oauth2/authServer/v1/userinfo"}}; '
"importantCookie=importantValue; sessionCookie=importantSessionValue"
)
expected_keys = {
"importantCookie",
"okta-oauth-redirect-params",
"provider-oauth-nonce",
"sessionCookie",
}

async def app(scope, receive, send):
request = Request(scope, receive)
response = JSONResponse({"cookies": request.cookies})
await response(scope, receive, send)

client = TestClient(app)
response = client.get("/", headers={"cookie": tough_cookie})
result = response.json()
assert len(result["cookies"]) == 4
assert set(result["cookies"].keys()) == expected_keys


# These test cases copied from Tornado's implementation
@pytest.mark.parametrize(
"set_cookie,expected",
[
("chips=ahoy; vienna=finger", {"chips": "ahoy", "vienna": "finger"}),
# all semicolons are delimiters, even within quotes
(
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
{"keebler": '"E=mc2', "L": '\\"Loves\\"', "fudge": "\\012", "": '"'},
),
# Illegal cookies that have an '=' char in an unquoted value.
("keebler=E=mc2", {"keebler": "E=mc2"}),
# Cookies with ':' character in their name.
("key:term=value:term", {"key:term": "value:term"}),
# Cookies with '[' and ']'.
("a=b; c=[; d=r; f=h", {"a": "b", "c": "[", "d": "r", "f": "h"}),
# Cookies that RFC6265 allows.
("a=b; Domain=example.com", {"a": "b", "Domain": "example.com"}),
# parse_cookie() keeps only the last cookie with the same name.
("a=b; h=i; a=c", {"a": "c", "h": "i"}),
],
)
def test_cookies_edge_cases(set_cookie, expected):
async def app(scope, receive, send):
request = Request(scope, receive)
response = JSONResponse({"cookies": request.cookies})
await response(scope, receive, send)

client = TestClient(app)
response = client.get("/", headers={"cookie": set_cookie})
result = response.json()
assert result["cookies"] == expected


@pytest.mark.parametrize(
"set_cookie,expected",
[
# Chunks without an equals sign appear as unnamed values per
# https://bugzilla.mozilla.org/show_bug.cgi?id=169091
(
"abc=def; unnamed; django_language=en",
{"": "unnamed", "abc": "def", "django_language": "en"},
),
# Even a double quote may be an unamed value.
('a=b; "; c=d', {"a": "b", "": '"', "c": "d"}),
# Spaces in names and values, and an equals sign in values.
("a b c=d e = f; gh=i", {"a b c": "d e = f", "gh": "i"}),
# More characters the spec forbids.
('a b,c<>@:/[]?{}=d " =e,f g', {"a b,c<>@:/[]?{}": 'd " =e,f g'}),
# Unicode characters. The spec only allows ASCII.
# ("saint=André Bessette", {"saint": "André Bessette"}),
# Browsers don't send extra whitespace or semicolons in Cookie headers,
# but cookie_parser() should parse whitespace the same way
# document.cookie parses whitespace.
# (" = b ; ; = ; c = ; ", {"": "b", "c": ""}),
],
)
def test_cookies_invalid(set_cookie, expected):
"""
Cookie strings that are against the RFC6265 spec but which browsers will send if set
via document.cookie.
"""

async def app(scope, receive, send):
request = Request(scope, receive)
response = JSONResponse({"cookies": request.cookies})
await response(scope, receive, send)

client = TestClient(app)
response = client.get("/", cookies={"invalid/cookie": "test", "valid": "test2"})
assert response.json() == {"cookies": {}}
response = client.get("/", headers={"cookie": set_cookie})
result = response.json()
assert result["cookies"] == expected


def test_chunked_encoding():
Expand Down