From 75d9915b218be158b2de3dfc2e8b880aba62270d Mon Sep 17 00:00:00 2001 From: Sergey Shepelev Date: Thu, 19 Dec 2019 14:02:28 +0300 Subject: [PATCH] IMPORTANT cache invalidation change, fix 307 keep method, add 308 redirect - support 308 permanent redirect with same method - Http() new attribute, `safe_methods` is public API, type is anything with `contains` operator, tuple is recommended. Defaults are GET,HEAD,OPTIONS,TRACE as per RFC7231. - invalidate cache after request per RFC2616 Section 13.10 before: `method not in ["GET", "HEAD"]` now: `method not in ["GET", "HEAD", "OPTIONS", "TRACE"]` and adjustable - 308 may be cached not by default as per RFC7538, but only with relevant headers https://github.com/httplib2/httplib2/issues/151 --- python2/httplib2/__init__.py | 45 ++++++++++++++++++------------- python3/httplib2/__init__.py | 38 +++++++++++++++++---------- tests/__init__.py | 2 ++ tests/test_http.py | 51 ++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 32 deletions(-) diff --git a/python2/httplib2/__init__.py b/python2/httplib2/__init__.py index c8302eb8..2bf1ac10 100644 --- a/python2/httplib2/__init__.py +++ b/python2/httplib2/__init__.py @@ -291,6 +291,9 @@ class NotRunningAppEngineEnvironment(HttpLib2Error): "upgrade", ] +# https://tools.ietf.org/html/rfc7231#section-8.1.3 +SAFE_METHODS = ("GET", "HEAD") # TODO add "OPTIONS", "TRACE" + def _get_end2end_headers(response): hopbyhop = list(HOP_BY_HOP) @@ -1175,9 +1178,9 @@ def connect(self): host = self.host port = self.port - + socket_err = None - + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res try: @@ -1353,9 +1356,9 @@ def connect(self): host = self.host port = self.port - + socket_err = None - + address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) for family, socktype, proto, canonname, sockaddr in address_info: try: @@ -1665,6 +1668,8 @@ def __init__( # which methods get an "if-match:" etag header added to them. self.optimistic_concurrency_methods = ["PUT", "PATCH"] + self.safe_methods = list(SAFE_METHODS) + # If 'follow_redirects' is True, and this is set to True then # all redirecs are followed, including unsafe ones. self.follow_all_redirects = False @@ -1858,10 +1863,10 @@ def _request( if ( self.follow_all_redirects - or (method in ["GET", "HEAD"]) - or response.status == 303 + or method in self.safe_methods + or response.status in (303, 308) ): - if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: + if self.follow_redirects and response.status in (300, 301, 302, 303, 307, 308): # Pick out the location header and basically start from the beginning # remembering first to strip the ETag header and decrement our 'depth' if redirections: @@ -1881,7 +1886,7 @@ def _request( response["location"] = urlparse.urljoin( absolute_uri, location ) - if response.status == 301 and method in ["GET", "HEAD"]: + if response.status == 308 or (response.status == 301 and method in self.safe_methods): response["-x-permanent-redirect-url"] = response["location"] if "content-location" not in response: response["content-location"] = absolute_uri @@ -1918,7 +1923,7 @@ def _request( response, content, ) - elif response.status in [200, 203] and method in ["GET", "HEAD"]: + elif response.status in [200, 203] and method in self.safe_methods: # Don't cache 206's since we aren't going to handle byte range requests if "content-location" not in response: response["content-location"] = absolute_uri @@ -2018,6 +2023,7 @@ def request( headers["accept-encoding"] = "gzip, deflate" info = email.Message.Message() + cachekey = None cached_value = None if self.cache: cachekey = defrag_uri.encode("utf-8") @@ -2038,8 +2044,6 @@ def request( self.cache.delete(cachekey) cachekey = None cached_value = None - else: - cachekey = None if ( method in self.optimistic_concurrency_methods @@ -2051,13 +2055,15 @@ def request( # http://www.w3.org/1999/04/Editing/ headers["if-match"] = info["etag"] - if method not in ["GET", "HEAD"] and self.cache and cachekey: - # RFC 2616 Section 13.10 + # https://tools.ietf.org/html/rfc7234 + # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location + # when a non-error status code is received in response to an unsafe request method. + if self.cache and cachekey and method not in self.safe_methods: self.cache.delete(cachekey) # Check the vary header in the cache to see if this request # matches what varies in the cache. - if method in ["GET", "HEAD"] and "vary" in info: + if method in self.safe_methods and "vary" in info: vary = info["vary"] vary_headers = vary.lower().replace(" ", "").split(",") for header in vary_headers: @@ -2068,11 +2074,14 @@ def request( break if ( - cached_value - and method in ["GET", "HEAD"] - and self.cache + self.cache + and cached_value + and (method in self.safe_methods or info["status"] == "308") and "range" not in headers ): + redirect_method = method + if info["status"] not in ("307", "308"): + redirect_method = "GET" if "-x-permanent-redirect-url" in info: # Should cached permanent redirects be counted in our redirection count? For now, yes. if redirections <= 0: @@ -2083,7 +2092,7 @@ def request( ) (response, new_content) = self.request( info["-x-permanent-redirect-url"], - method="GET", + method=redirect_method, headers=headers, redirections=redirections - 1, ) diff --git a/python3/httplib2/__init__.py b/python3/httplib2/__init__.py index d8c3d343..3854bcd2 100644 --- a/python3/httplib2/__init__.py +++ b/python3/httplib2/__init__.py @@ -161,6 +161,10 @@ class ProxiesUnavailableError(HttpLib2Error): "upgrade", ] +# https://tools.ietf.org/html/rfc7231#section-8.1.3 +SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE") + + from httplib2 import certs CA_CERTS = certs.where() @@ -1471,6 +1475,8 @@ def __init__( # which methods get an "if-match:" etag header added to them. self.optimistic_concurrency_methods = ["PUT", "PATCH"] + self.safe_methods = list(SAFE_METHODS) + # If 'follow_redirects' is True, and this is set to True then # all redirecs are followed, including unsafe ones. self.follow_all_redirects = False @@ -1663,10 +1669,10 @@ def _request( if ( self.follow_all_redirects - or (method in ["GET", "HEAD"]) - or response.status == 303 + or method in self.safe_methods + or response.status in (303, 308) ): - if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: + if self.follow_redirects and response.status in (300, 301, 302, 303, 307, 308): # Pick out the location header and basically start from the beginning # remembering first to strip the ETag header and decrement our 'depth' if redirections: @@ -1686,7 +1692,7 @@ def _request( response["location"] = urllib.parse.urljoin( absolute_uri, location ) - if response.status == 301 and method in ["GET", "HEAD"]: + if response.status == 308 or (response.status == 301 and (method in self.safe_methods)): response["-x-permanent-redirect-url"] = response["location"] if "content-location" not in response: response["content-location"] = absolute_uri @@ -1723,7 +1729,7 @@ def _request( response, content, ) - elif response.status in [200, 203] and method in ["GET", "HEAD"]: + elif response.status in [200, 203] and method in self.safe_methods: # Don't cache 206's since we aren't going to handle byte range requests if "content-location" not in response: response["content-location"] = absolute_uri @@ -1822,6 +1828,7 @@ def request( headers["accept-encoding"] = "gzip, deflate" info = email.message.Message() + cachekey = None cached_value = None if self.cache: cachekey = defrag_uri @@ -1839,8 +1846,6 @@ def request( self.cache.delete(cachekey) cachekey = None cached_value = None - else: - cachekey = None if ( method in self.optimistic_concurrency_methods @@ -1852,13 +1857,15 @@ def request( # http://www.w3.org/1999/04/Editing/ headers["if-match"] = info["etag"] - if method not in ["GET", "HEAD"] and self.cache and cachekey: - # RFC 2616 Section 13.10 + # https://tools.ietf.org/html/rfc7234 + # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location + # when a non-error status code is received in response to an unsafe request method. + if self.cache and cachekey and method not in self.safe_methods: self.cache.delete(cachekey) # Check the vary header in the cache to see if this request # matches what varies in the cache. - if method in ["GET", "HEAD"] and "vary" in info: + if method in self.safe_methods and "vary" in info: vary = info["vary"] vary_headers = vary.lower().replace(" ", "").split(",") for header in vary_headers: @@ -1869,11 +1876,14 @@ def request( break if ( - cached_value - and method in ["GET", "HEAD"] - and self.cache + self.cache + and cached_value + and (method in self.safe_methods or info["status"] == "308") and "range" not in headers ): + redirect_method = method + if info["status"] not in ("307", "308"): + redirect_method = "GET" if "-x-permanent-redirect-url" in info: # Should cached permanent redirects be counted in our redirection count? For now, yes. if redirections <= 0: @@ -1884,7 +1894,7 @@ def request( ) (response, new_content) = self.request( info["-x-permanent-redirect-url"], - method="GET", + method=redirect_method, headers=headers, redirections=redirections - 1, ) diff --git a/tests/__init__.py b/tests/__init__.py index 496652bd..a15db9ed 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -406,10 +406,12 @@ def server_request_socket_handler(sock, tick): request = HttpRequest.from_buffered(buf) if request is None: break + # print("--- debug request\n" + request.raw.decode("ascii", "replace")) i += 1 request.client_sock = sock request.number = i response = request_handler(request=request) + # print("--- debug response\n" + response.decode("ascii", "replace")) sock.sendall(response) request.client_sock = None if not tick(request): diff --git a/tests/test_http.py b/tests/test_http.py index 97b52dc2..641a647c 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -622,6 +622,57 @@ def test_get_307(): assert content == b"final content\n" +def test_post_307(): + # 307: follow with same method + http = httplib2.Http(cache=tests.get_cache_path(), timeout=1) + http.follow_all_redirects = True + r307 = tests.http_response_bytes(status=307, headers={"location": "/final"}) + r200 = tests.http_response_bytes(status=200, body=b"final content\n") + + with tests.server_list_http([r307, r200, r307, r200]) as uri: + response, content = http.request(uri, "POST") + assert response.previous.status == 307 + assert not response.previous.fromcache + assert response.status == 200 + assert not response.fromcache + assert content == b"final content\n" + + response, content = http.request(uri, "POST") + assert response.previous.status == 307 + assert not response.previous.fromcache + assert response.status == 200 + assert not response.fromcache + assert content == b"final content\n" + + +def test_change_308(): + # 308: follow with same method, cache redirect + http = httplib2.Http(cache=tests.get_cache_path(), timeout=1) + routes = { + "/final": tests.make_http_reflect(), + "": tests.http_response_bytes( + status="308 Permanent Redirect", + add_date=True, + headers={"cache-control": "max-age=300", "location": "/final"}, + ), + } + + with tests.server_route(routes, request_count=3) as uri: + response, content = http.request(uri, "CHANGE", body=b"hello308") + assert response.previous.status == 308 + assert not response.previous.fromcache + assert response.status == 200 + assert not response.fromcache + assert content.startswith(b"CHANGE /final HTTP") + + response, content = http.request(uri, "CHANGE") + assert response.previous.status == 308 + assert response.previous.fromcache + assert response.status == 200 + assert not response.fromcache + assert content.startswith(b"CHANGE /final HTTP") + + def test_get_410(): # Test that we pass 410's through http = httplib2.Http()