diff --git a/starlette/requests.py b/starlette/requests.py index 0ebeb3f4f..b6e62ff18 100644 --- a/starlette/requests.py +++ b/starlette/requests.py @@ -1,5 +1,5 @@ import asyncio -import http.cookies +from http import cookies as http_cookies import json import typing from collections.abc import Mapping @@ -23,6 +23,33 @@ } +def cookie_parser(cookie_string: str) -> typing.Dict[str, str]: + """ + This function parses a ``Cookie`` HTTP header into a dict of key/value pairs. + + It attempts to mimic browser cookie parsing behavior: browsers and web servers + frequently disregard the spec (RFC 6265) when setting and reading cookies, + so we attempt to suit the common scenarios here. + + This function has been adapted from Django 3.1.0. + Note: we are explicitly _NOT_ using `SimpleCookie.load` because it is based + on an outdated spec and will fail on lots of input we want to support + """ + cookie_dict: typing.Dict[str, str] = {} + for chunk in cookie_string.split(";"): + if "=" in chunk: + key, val = chunk.split("=", 1) + else: + # Assume an empty name per + # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 + key, val = "", chunk + key, val = key.strip(), val.strip() + if key or val: + # unquote using Python's algorithm. + cookie_dict[key] = http_cookies._unquote(val) # type: ignore + return cookie_dict + + class ClientDisconnect(Exception): pass @@ -87,16 +114,11 @@ def path_params(self) -> dict: @property def cookies(self) -> typing.Dict[str, str]: if not hasattr(self, "_cookies"): - cookies = {} + cookies: typing.Dict[str, str] = {} cookie_header = self.headers.get("cookie") + if cookie_header: - cookie = http.cookies.SimpleCookie() # type: http.cookies.BaseCookie - try: - cookie.load(cookie_header) - except http.cookies.CookieError: - pass - for key, morsel in cookie.items(): - cookies[key] = morsel.value + cookies = cookie_parser(cookie_header) self._cookies = cookies return self._cookies diff --git a/tests/test_requests.py b/tests/test_requests.py index 3381249e2..c5e50edbe 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -285,15 +285,112 @@ async def app(scope, receive, send): assert response.text == "Hello, cookies!" -def test_invalid_cookie(): +def test_cookie_lenient_parsing(): + """ + The following test is based on a cookie set by Okta, a well-known authorization service. + It turns out that it's common practice to set cookies that would be invalid according to + the spec. + """ + tough_cookie = ( + "provider-oauth-nonce=validAsciiblabla; " + 'okta-oauth-redirect-params={"responseType":"code","state":"somestate",' + '"nonce":"somenonce","scopes":["openid","profile","email","phone"],' + '"urls":{"issuer":"https://subdomain.okta.com/oauth2/authServer",' + '"authorizeUrl":"https://subdomain.okta.com/oauth2/authServer/v1/authorize",' + '"userinfoUrl":"https://subdomain.okta.com/oauth2/authServer/v1/userinfo"}}; ' + "importantCookie=importantValue; sessionCookie=importantSessionValue" + ) + expected_keys = { + "importantCookie", + "okta-oauth-redirect-params", + "provider-oauth-nonce", + "sessionCookie", + } + + async def app(scope, receive, send): + request = Request(scope, receive) + response = JSONResponse({"cookies": request.cookies}) + await response(scope, receive, send) + + client = TestClient(app) + response = client.get("/", headers={"cookie": tough_cookie}) + result = response.json() + assert len(result["cookies"]) == 4 + assert set(result["cookies"].keys()) == expected_keys + + +# These test cases copied from Tornado's implementation +@pytest.mark.parametrize( + "set_cookie,expected", + [ + ("chips=ahoy; vienna=finger", {"chips": "ahoy", "vienna": "finger"}), + # all semicolons are delimiters, even within quotes + ( + 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', + {"keebler": '"E=mc2', "L": '\\"Loves\\"', "fudge": "\\012", "": '"'}, + ), + # Illegal cookies that have an '=' char in an unquoted value. + ("keebler=E=mc2", {"keebler": "E=mc2"}), + # Cookies with ':' character in their name. + ("key:term=value:term", {"key:term": "value:term"}), + # Cookies with '[' and ']'. + ("a=b; c=[; d=r; f=h", {"a": "b", "c": "[", "d": "r", "f": "h"}), + # Cookies that RFC6265 allows. + ("a=b; Domain=example.com", {"a": "b", "Domain": "example.com"}), + # parse_cookie() keeps only the last cookie with the same name. + ("a=b; h=i; a=c", {"a": "c", "h": "i"}), + ], +) +def test_cookies_edge_cases(set_cookie, expected): + async def app(scope, receive, send): + request = Request(scope, receive) + response = JSONResponse({"cookies": request.cookies}) + await response(scope, receive, send) + + client = TestClient(app) + response = client.get("/", headers={"cookie": set_cookie}) + result = response.json() + assert result["cookies"] == expected + + +@pytest.mark.parametrize( + "set_cookie,expected", + [ + # Chunks without an equals sign appear as unnamed values per + # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 + ( + "abc=def; unnamed; django_language=en", + {"": "unnamed", "abc": "def", "django_language": "en"}, + ), + # Even a double quote may be an unamed value. + ('a=b; "; c=d', {"a": "b", "": '"', "c": "d"}), + # Spaces in names and values, and an equals sign in values. + ("a b c=d e = f; gh=i", {"a b c": "d e = f", "gh": "i"}), + # More characters the spec forbids. + ('a b,c<>@:/[]?{}=d " =e,f g', {"a b,c<>@:/[]?{}": 'd " =e,f g'}), + # Unicode characters. The spec only allows ASCII. + # ("saint=André Bessette", {"saint": "André Bessette"}), + # Browsers don't send extra whitespace or semicolons in Cookie headers, + # but cookie_parser() should parse whitespace the same way + # document.cookie parses whitespace. + # (" = b ; ; = ; c = ; ", {"": "b", "c": ""}), + ], +) +def test_cookies_invalid(set_cookie, expected): + """ + Cookie strings that are against the RFC6265 spec but which browsers will send if set + via document.cookie. + """ + async def app(scope, receive, send): request = Request(scope, receive) response = JSONResponse({"cookies": request.cookies}) await response(scope, receive, send) client = TestClient(app) - response = client.get("/", cookies={"invalid/cookie": "test", "valid": "test2"}) - assert response.json() == {"cookies": {}} + response = client.get("/", headers={"cookie": set_cookie}) + result = response.json() + assert result["cookies"] == expected def test_chunked_encoding():