Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate File When Requested #2526

Merged
merged 23 commits into from
Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 50 additions & 9 deletions sanic/response.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from datetime import datetime
from email.utils import formatdate
from datetime import datetime, timezone
from email.utils import formatdate, parsedate_to_datetime
from functools import partial
from mimetypes import guess_type
from os import path
Expand Down Expand Up @@ -33,6 +33,7 @@
remove_entity_headers,
)
from sanic.http import Http
from sanic.log import logger
from sanic.models.protocol_types import HTMLProtocol, Range


Expand Down Expand Up @@ -319,9 +320,34 @@ def html(
)


async def validate_file(
request_headers: Header, last_modified: Union[datetime, float, int]
):
try:
if_modified_since = request_headers.getone("If-Modified-Since")
except KeyError:
return
try:
if_modified_since = parsedate_to_datetime(if_modified_since)
except (TypeError, ValueError):
ChihweiLHBird marked this conversation as resolved.
Show resolved Hide resolved
logger.warning(
"Ignorning invalid If-Modified-Since header received: " "'%s'",
if_modified_since,
)
return
if not isinstance(last_modified, datetime):
last_modified = datetime.fromtimestamp(
float(last_modified), tz=timezone.utc
).replace(microsecond=0)
if last_modified <= if_modified_since:
return HTTPResponse(status=304)


async def file(
location: Union[str, PurePath],
status: int = 200,
request_headers: Optional[Header] = None,
validate_when_requested: bool = True,
ChihweiLHBird marked this conversation as resolved.
Show resolved Hide resolved
mime_type: Optional[str] = None,
headers: Optional[Dict[str, str]] = None,
filename: Optional[str] = None,
Expand All @@ -331,7 +357,12 @@ async def file(
_range: Optional[Range] = None,
) -> HTTPResponse:
"""Return a response object with file data.

:param status: HTTP response code. Won't enforce the passed in
status if only a part of the content will be sent (206)
or file is being validated (304).
:param request_headers: The request headers.
:param validate_when_requested: If True, will validate the
file when requested.
:param location: Location of file on system.
:param mime_type: Specific mime_type.
:param headers: Custom Headers.
Expand All @@ -341,21 +372,31 @@ async def file(
:param no_store: Any cache should not store this response.
:param _range:
"""
headers = headers or {}
if filename:
headers.setdefault(
"Content-Disposition", f'attachment; filename="{filename}"'
)

if isinstance(last_modified, datetime):
last_modified = last_modified.replace(microsecond=0).timestamp()
elif isinstance(last_modified, Default):
stat = await stat_async(location)
last_modified = stat.st_mtime

if (
validate_when_requested
and request_headers is not None
and last_modified
):
response = await validate_file(request_headers, last_modified)
if response:
return response

headers = headers or {}
if last_modified:
headers.setdefault(
"last-modified", formatdate(last_modified, usegmt=True)
"Last-Modified", formatdate(last_modified, usegmt=True)
)

if filename:
headers.setdefault(
"Content-Disposition", f'attachment; filename="{filename}"'
)

if no_store:
Expand Down
145 changes: 134 additions & 11 deletions tests/test_response.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import inspect
import os
import time

from collections import namedtuple
from datetime import datetime
Expand Down Expand Up @@ -730,27 +731,37 @@ def test_file_response_headers(
test_expires = test_last_modified.timestamp() + test_max_age

@app.route("/files/cached/<filename>", methods=["GET"])
def file_route_cache(request, filename):
file_path = (Path(static_file_directory) / file_name).absolute()
def file_route_cache(request: Request, filename: str):
file_path = (
Path(static_file_directory) / unquote(filename)
).absolute()
return file(
file_path, max_age=test_max_age, last_modified=test_last_modified
)

@app.route(
"/files/cached_default_last_modified/<filename>", methods=["GET"]
)
def file_route_cache_default_last_modified(request, filename):
file_path = (Path(static_file_directory) / file_name).absolute()
def file_route_cache_default_last_modified(
request: Request, filename: str
):
file_path = (
Path(static_file_directory) / unquote(filename)
).absolute()
return file(file_path, max_age=test_max_age)

@app.route("/files/no_cache/<filename>", methods=["GET"])
def file_route_no_cache(request, filename):
file_path = (Path(static_file_directory) / file_name).absolute()
def file_route_no_cache(request: Request, filename: str):
file_path = (
Path(static_file_directory) / unquote(filename)
).absolute()
return file(file_path)

@app.route("/files/no_store/<filename>", methods=["GET"])
def file_route_no_store(request, filename):
file_path = (Path(static_file_directory) / file_name).absolute()
def file_route_no_store(request: Request, filename: str):
file_path = (
Path(static_file_directory) / unquote(filename)
).absolute()
return file(file_path, no_store=True)

_, response = app.test_client.get(f"/files/cached/{file_name}")
Expand All @@ -767,11 +778,11 @@ def file_route_no_store(request, filename):
== formatdate(test_expires, usegmt=True)[:-6]
# [:-6] to allow at most 1 min difference
# It's minimal for cases like:
# Thu, 26 May 2022 05:36:49 GMT
# Thu, 26 May 2022 05:36:59 GMT
# AND
# Thu, 26 May 2022 05:36:50 GMT
# Thu, 26 May 2022 05:37:00 GMT
)

assert response.status == 200
assert "last-modified" in headers and headers.get(
"last-modified"
) == formatdate(test_last_modified.timestamp(), usegmt=True)
Expand All @@ -786,15 +797,127 @@ def file_route_no_store(request, filename):
assert "last-modified" in headers and headers.get(
"last-modified"
) == formatdate(file_last_modified, usegmt=True)
assert response.status == 200

_, response = app.test_client.get(f"/files/no_cache/{file_name}")
headers = response.headers
assert "cache-control" in headers and f"no-cache" == headers.get(
"cache-control"
)
assert response.status == 200

_, response = app.test_client.get(f"/files/no_store/{file_name}")
headers = response.headers
assert "cache-control" in headers and f"no-store" == headers.get(
"cache-control"
)
assert response.status == 200


def test_file_validate(app: Sanic, static_file_directory: str):
file_name = "test_validate.txt"
static_file_directory = Path(static_file_directory)
file_path = static_file_directory / file_name
file_path = file_path.absolute()
test_max_age = 10

with open(file_path, "w+") as f:
f.write("foo\n")

@app.route("/validate", methods=["GET"])
def file_route_cache(request: Request):
return file(
file_path,
request_headers=request.headers,
max_age=test_max_age,
validate_when_requested=True,
)

_, response = app.test_client.get("/validate")
assert response.status == 200
assert response.body == b"foo\n"
last_modified = response.headers["Last-Modified"]

time.sleep(1)
with open(file_path, "a") as f:
f.write("bar\n")

_, response = app.test_client.get(
"/validate", headers={"If-Modified-Since": last_modified}
)
assert response.status == 200
assert response.body == b"foo\nbar\n"

last_modified = response.headers["Last-Modified"]
_, response = app.test_client.get(
"/validate", headers={"if-modified-since": last_modified}
)
assert response.status == 304
assert response.body == b""

file_path.unlink()


@pytest.mark.parametrize(
"file_name", ["test.file", "decode me.txt", "python.png"]
)
def test_file_validating_invalid_header(
app: Sanic, file_name: str, static_file_directory: str
):
@app.route("/files/<filename>", methods=["GET"])
def file_route(request: Request, filename: str):
handler_file_path = (
Path(static_file_directory) / unquote(filename)
).absolute()

return file(
handler_file_path,
request_headers=request.headers,
validate_when_requested=True,
)

_, response = app.test_client.get(f"/files/{file_name}")
assert response.status == 200
assert response.body == get_file_content(static_file_directory, file_name)

_, response = app.test_client.get(
f"/files/{file_name}", headers={"if-modified-since": "invalid-value"}
)
assert response.status == 200
assert response.body == get_file_content(static_file_directory, file_name)

_, response = app.test_client.get(
f"/files/{file_name}", headers={"if-modified-since": ""}
)
assert response.status == 200
assert response.body == get_file_content(static_file_directory, file_name)


@pytest.mark.parametrize(
"file_name", ["test.file", "decode me.txt", "python.png"]
)
def test_file_validating_304_response(
app: Sanic, file_name: str, static_file_directory: str
):
@app.route("/files/<filename>", methods=["GET"])
def file_route(request: Request, filename: str):
handler_file_path = (
Path(static_file_directory) / unquote(filename)
).absolute()

return file(
handler_file_path,
request_headers=request.headers,
validate_when_requested=True,
)

_, response = app.test_client.get(f"/files/{file_name}")
assert response.status == 200
assert response.body == get_file_content(static_file_directory, file_name)

_, response = app.test_client.get(
f"/files/{file_name}",
headers={"if-modified-since": response.headers["Last-Modified"]},
)
assert response.status == 304
assert response.body == b""