Skip to content

Commit

Permalink
IMPORTANT cache invalidation change, fix 307 keep method, add 308 red…
Browse files Browse the repository at this point in the history
…irect

- support 308 permanent redirect with same method
- Http() new attribute, `safe_methods` is public API,
  type is anything with `contains` operator, tuple is recommended.
  Defaults are GET,HEAD,OPTIONS,TRACE as per RFC7231.
- invalidate cache after request per RFC2616 Section 13.10
  before: `method not in ["GET", "HEAD"]`
  now: `method not in ["GET", "HEAD", "OPTIONS", "TRACE"]` and adjustable
- 308 may be cached not by default as per RFC7538, but only with
  relevant headers

#151
  • Loading branch information
temoto committed Dec 19, 2019
1 parent 095494d commit 75d9915
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 32 deletions.
45 changes: 27 additions & 18 deletions python2/httplib2/__init__.py
Expand Up @@ -291,6 +291,9 @@ class NotRunningAppEngineEnvironment(HttpLib2Error):
"upgrade",
]

# https://tools.ietf.org/html/rfc7231#section-8.1.3
SAFE_METHODS = ("GET", "HEAD") # TODO add "OPTIONS", "TRACE"


def _get_end2end_headers(response):
hopbyhop = list(HOP_BY_HOP)
Expand Down Expand Up @@ -1175,9 +1178,9 @@ def connect(self):

host = self.host
port = self.port

socket_err = None

for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try:
Expand Down Expand Up @@ -1353,9 +1356,9 @@ def connect(self):

host = self.host
port = self.port

socket_err = None

address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
for family, socktype, proto, canonname, sockaddr in address_info:
try:
Expand Down Expand Up @@ -1665,6 +1668,8 @@ def __init__(
# which methods get an "if-match:" etag header added to them.
self.optimistic_concurrency_methods = ["PUT", "PATCH"]

self.safe_methods = list(SAFE_METHODS)

# If 'follow_redirects' is True, and this is set to True then
# all redirecs are followed, including unsafe ones.
self.follow_all_redirects = False
Expand Down Expand Up @@ -1858,10 +1863,10 @@ def _request(

if (
self.follow_all_redirects
or (method in ["GET", "HEAD"])
or response.status == 303
or method in self.safe_methods
or response.status in (303, 308)
):
if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
if self.follow_redirects and response.status in (300, 301, 302, 303, 307, 308):
# Pick out the location header and basically start from the beginning
# remembering first to strip the ETag header and decrement our 'depth'
if redirections:
Expand All @@ -1881,7 +1886,7 @@ def _request(
response["location"] = urlparse.urljoin(
absolute_uri, location
)
if response.status == 301 and method in ["GET", "HEAD"]:
if response.status == 308 or (response.status == 301 and method in self.safe_methods):
response["-x-permanent-redirect-url"] = response["location"]
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -1918,7 +1923,7 @@ def _request(
response,
content,
)
elif response.status in [200, 203] and method in ["GET", "HEAD"]:
elif response.status in [200, 203] and method in self.safe_methods:
# Don't cache 206's since we aren't going to handle byte range requests
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -2018,6 +2023,7 @@ def request(
headers["accept-encoding"] = "gzip, deflate"

info = email.Message.Message()
cachekey = None
cached_value = None
if self.cache:
cachekey = defrag_uri.encode("utf-8")
Expand All @@ -2038,8 +2044,6 @@ def request(
self.cache.delete(cachekey)
cachekey = None
cached_value = None
else:
cachekey = None

if (
method in self.optimistic_concurrency_methods
Expand All @@ -2051,13 +2055,15 @@ def request(
# http://www.w3.org/1999/04/Editing/
headers["if-match"] = info["etag"]

if method not in ["GET", "HEAD"] and self.cache and cachekey:
# RFC 2616 Section 13.10
# https://tools.ietf.org/html/rfc7234
# A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location
# when a non-error status code is received in response to an unsafe request method.
if self.cache and cachekey and method not in self.safe_methods:
self.cache.delete(cachekey)

# Check the vary header in the cache to see if this request
# matches what varies in the cache.
if method in ["GET", "HEAD"] and "vary" in info:
if method in self.safe_methods and "vary" in info:
vary = info["vary"]
vary_headers = vary.lower().replace(" ", "").split(",")
for header in vary_headers:
Expand All @@ -2068,11 +2074,14 @@ def request(
break

if (
cached_value
and method in ["GET", "HEAD"]
and self.cache
self.cache
and cached_value
and (method in self.safe_methods or info["status"] == "308")
and "range" not in headers
):
redirect_method = method
if info["status"] not in ("307", "308"):
redirect_method = "GET"
if "-x-permanent-redirect-url" in info:
# Should cached permanent redirects be counted in our redirection count? For now, yes.
if redirections <= 0:
Expand All @@ -2083,7 +2092,7 @@ def request(
)
(response, new_content) = self.request(
info["-x-permanent-redirect-url"],
method="GET",
method=redirect_method,
headers=headers,
redirections=redirections - 1,
)
Expand Down
38 changes: 24 additions & 14 deletions python3/httplib2/__init__.py
Expand Up @@ -161,6 +161,10 @@ class ProxiesUnavailableError(HttpLib2Error):
"upgrade",
]

# https://tools.ietf.org/html/rfc7231#section-8.1.3
SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE")


from httplib2 import certs
CA_CERTS = certs.where()

Expand Down Expand Up @@ -1471,6 +1475,8 @@ def __init__(
# which methods get an "if-match:" etag header added to them.
self.optimistic_concurrency_methods = ["PUT", "PATCH"]

self.safe_methods = list(SAFE_METHODS)

# If 'follow_redirects' is True, and this is set to True then
# all redirecs are followed, including unsafe ones.
self.follow_all_redirects = False
Expand Down Expand Up @@ -1663,10 +1669,10 @@ def _request(

if (
self.follow_all_redirects
or (method in ["GET", "HEAD"])
or response.status == 303
or method in self.safe_methods
or response.status in (303, 308)
):
if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
if self.follow_redirects and response.status in (300, 301, 302, 303, 307, 308):
# Pick out the location header and basically start from the beginning
# remembering first to strip the ETag header and decrement our 'depth'
if redirections:
Expand All @@ -1686,7 +1692,7 @@ def _request(
response["location"] = urllib.parse.urljoin(
absolute_uri, location
)
if response.status == 301 and method in ["GET", "HEAD"]:
if response.status == 308 or (response.status == 301 and (method in self.safe_methods)):
response["-x-permanent-redirect-url"] = response["location"]
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -1723,7 +1729,7 @@ def _request(
response,
content,
)
elif response.status in [200, 203] and method in ["GET", "HEAD"]:
elif response.status in [200, 203] and method in self.safe_methods:
# Don't cache 206's since we aren't going to handle byte range requests
if "content-location" not in response:
response["content-location"] = absolute_uri
Expand Down Expand Up @@ -1822,6 +1828,7 @@ def request(
headers["accept-encoding"] = "gzip, deflate"

info = email.message.Message()
cachekey = None
cached_value = None
if self.cache:
cachekey = defrag_uri
Expand All @@ -1839,8 +1846,6 @@ def request(
self.cache.delete(cachekey)
cachekey = None
cached_value = None
else:
cachekey = None

if (
method in self.optimistic_concurrency_methods
Expand All @@ -1852,13 +1857,15 @@ def request(
# http://www.w3.org/1999/04/Editing/
headers["if-match"] = info["etag"]

if method not in ["GET", "HEAD"] and self.cache and cachekey:
# RFC 2616 Section 13.10
# https://tools.ietf.org/html/rfc7234
# A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location
# when a non-error status code is received in response to an unsafe request method.
if self.cache and cachekey and method not in self.safe_methods:
self.cache.delete(cachekey)

# Check the vary header in the cache to see if this request
# matches what varies in the cache.
if method in ["GET", "HEAD"] and "vary" in info:
if method in self.safe_methods and "vary" in info:
vary = info["vary"]
vary_headers = vary.lower().replace(" ", "").split(",")
for header in vary_headers:
Expand All @@ -1869,11 +1876,14 @@ def request(
break

if (
cached_value
and method in ["GET", "HEAD"]
and self.cache
self.cache
and cached_value
and (method in self.safe_methods or info["status"] == "308")
and "range" not in headers
):
redirect_method = method
if info["status"] not in ("307", "308"):
redirect_method = "GET"
if "-x-permanent-redirect-url" in info:
# Should cached permanent redirects be counted in our redirection count? For now, yes.
if redirections <= 0:
Expand All @@ -1884,7 +1894,7 @@ def request(
)
(response, new_content) = self.request(
info["-x-permanent-redirect-url"],
method="GET",
method=redirect_method,
headers=headers,
redirections=redirections - 1,
)
Expand Down
2 changes: 2 additions & 0 deletions tests/__init__.py
Expand Up @@ -406,10 +406,12 @@ def server_request_socket_handler(sock, tick):
request = HttpRequest.from_buffered(buf)
if request is None:
break
# print("--- debug request\n" + request.raw.decode("ascii", "replace"))
i += 1
request.client_sock = sock
request.number = i
response = request_handler(request=request)
# print("--- debug response\n" + response.decode("ascii", "replace"))
sock.sendall(response)
request.client_sock = None
if not tick(request):
Expand Down
51 changes: 51 additions & 0 deletions tests/test_http.py
Expand Up @@ -622,6 +622,57 @@ def test_get_307():
assert content == b"final content\n"


def test_post_307():
# 307: follow with same method
http = httplib2.Http(cache=tests.get_cache_path(), timeout=1)
http.follow_all_redirects = True
r307 = tests.http_response_bytes(status=307, headers={"location": "/final"})
r200 = tests.http_response_bytes(status=200, body=b"final content\n")

with tests.server_list_http([r307, r200, r307, r200]) as uri:
response, content = http.request(uri, "POST")
assert response.previous.status == 307
assert not response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content == b"final content\n"

response, content = http.request(uri, "POST")
assert response.previous.status == 307
assert not response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content == b"final content\n"


def test_change_308():
# 308: follow with same method, cache redirect
http = httplib2.Http(cache=tests.get_cache_path(), timeout=1)
routes = {
"/final": tests.make_http_reflect(),
"": tests.http_response_bytes(
status="308 Permanent Redirect",
add_date=True,
headers={"cache-control": "max-age=300", "location": "/final"},
),
}

with tests.server_route(routes, request_count=3) as uri:
response, content = http.request(uri, "CHANGE", body=b"hello308")
assert response.previous.status == 308
assert not response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content.startswith(b"CHANGE /final HTTP")

response, content = http.request(uri, "CHANGE")
assert response.previous.status == 308
assert response.previous.fromcache
assert response.status == 200
assert not response.fromcache
assert content.startswith(b"CHANGE /final HTTP")


def test_get_410():
# Test that we pass 410's through
http = httplib2.Http()
Expand Down

0 comments on commit 75d9915

Please sign in to comment.