Skip to content

Commit

Permalink
[WIP] Refactor cookie handling in CookiesMiddleware and add test case…
Browse files Browse the repository at this point in the history
… for off-domain jar storage
  • Loading branch information
Emmanuel Rondan committed Jun 9, 2023
1 parent 4dacad0 commit 80aaa7c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
11 changes: 4 additions & 7 deletions scrapy/downloadermiddlewares/cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,11 @@ def _process_cookies(self, cookies, *, jar, request):
if cookie_domain.startswith("."):
cookie_domain = cookie_domain[1:]

request_domain = urlparse_cached(request).hostname.lower()

if cookie_domain and _is_public_domain(cookie_domain):
if cookie_domain != request_domain:
continue
cookie.domain = request_domain
if _is_public_domain(cookie_domain):
print(f'cookie domain {cookie_domain} is public, skipping')
continue

jar.set_cookie_if_ok(cookie, request)
jar.set_cookie(cookie)

def process_request(self, request, spider):
if request.meta.get("dont_merge_cookies", False):
Expand Down
18 changes: 18 additions & 0 deletions tests/test_downloadermiddleware_cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,3 +732,21 @@ def test_server_set_cookie_domain_public_period(self):
"co.uk",
cookies=True,
)

def test_off_domain_jar_storage(self):
request1 = Request(
"https://a.example",
cookies=[
{
'name': 'foo',
'value': 'bar',
'domain': 'b.example',
},
],
)
assert self.mw.process_request(request1, self.spider) is None
self.assertNotIn(b"Cookie", request1.headers)

request2 = Request("https://b.example/")
assert self.mw.process_request(request2, self.spider) is None
self.assertEqual(request2.headers.get(b"Cookie"), b"foo=bar")

0 comments on commit 80aaa7c

Please sign in to comment.