From c94d399db023af11a24fff3f5146fadadbfcd0bd Mon Sep 17 00:00:00 2001 From: prryplatypus <25409753+prryplatypus@users.noreply.github.com> Date: Sun, 10 Jul 2022 16:01:08 +0200 Subject: [PATCH 01/14] Check unquoted URLs for directory traversing --- sanic/mixins/routes.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 425b29e0ba..e0b8e11b95 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -5,7 +5,6 @@ from mimetypes import guess_type from os import path from pathlib import PurePath -from re import sub from textwrap import dedent from time import gmtime, strftime from typing import ( @@ -806,23 +805,25 @@ async def _static_request_handler( content_type=None, __file_uri__=None, ): - # Using this to determine if the URL is trying to break out of the path - # served. os.path.realpath seems to be very slow - if __file_uri__ and "../" in __file_uri__: - raise BadRequest("Invalid URL") # Merge served directory and requested file if provided # Strip all / that in the beginning of the URL to help prevent python # from herping a derp and treating the uri as an absolute path - root_path = file_path = file_or_directory + root_path = file_path = unquote(file_or_directory) + if __file_uri__: + unquoted_file_uri = unquote(__file_uri__) file_path = path.join( - file_or_directory, sub("^[/]*", "", __file_uri__) + file_or_directory, unquoted_file_uri.lstrip("/") ) + pure_file_path = PurePath(file_path) + if any(part == ".." for part in pure_file_path.parts): + raise BadRequest("Invalid URL") + # URL decode the path sent by the browser otherwise we won't be able to # match filenames which got encoded (filenames with spaces etc) - file_path = path.abspath(unquote(file_path)) - if not file_path.startswith(path.abspath(unquote(root_path))): + file_path = path.abspath(file_path) + if not file_path.startswith(path.abspath(root_path)): error_logger.exception( f"File not found: path={file_or_directory}, " f"relative_url={__file_uri__}" From 34a964b0e321ce9751dc20450ffd37eda457f1b7 Mon Sep 17 00:00:00 2001 From: prryplatypus <25409753+prryplatypus@users.noreply.github.com> Date: Sun, 10 Jul 2022 16:12:59 +0200 Subject: [PATCH 02/14] Only check for traversal in `__file_uri__` --- sanic/mixins/routes.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index e0b8e11b95..de1335c00d 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -811,15 +811,16 @@ async def _static_request_handler( root_path = file_path = unquote(file_or_directory) if __file_uri__: - unquoted_file_uri = unquote(__file_uri__) + unquoted_file_uri = unquote(__file_uri__).lstrip("/") + + file_uri_pp = PurePath(file_path) + if any(part == ".." for part in file_uri_pp.parts): + raise BadRequest("Invalid URL") + file_path = path.join( - file_or_directory, unquoted_file_uri.lstrip("/") + file_or_directory, unquoted_file_uri ) - pure_file_path = PurePath(file_path) - if any(part == ".." for part in pure_file_path.parts): - raise BadRequest("Invalid URL") - # URL decode the path sent by the browser otherwise we won't be able to # match filenames which got encoded (filenames with spaces etc) file_path = path.abspath(file_path) From bb9da4f5f43286df9ce35b26053871c8aece20db Mon Sep 17 00:00:00 2001 From: prryplatypus <25409753+prryplatypus@users.noreply.github.com> Date: Sun, 10 Jul 2022 16:22:05 +0200 Subject: [PATCH 03/14] Actually use the correct variable --- sanic/mixins/routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index de1335c00d..1f789838e7 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -813,7 +813,7 @@ async def _static_request_handler( if __file_uri__: unquoted_file_uri = unquote(__file_uri__).lstrip("/") - file_uri_pp = PurePath(file_path) + file_uri_pp = PurePath(unquoted_file_uri) if any(part == ".." for part in file_uri_pp.parts): raise BadRequest("Invalid URL") From 95aceb271de2895200a4c68de345b0a45dcf98b4 Mon Sep 17 00:00:00 2001 From: prryplatypus <25409753+prryplatypus@users.noreply.github.com> Date: Mon, 18 Jul 2022 07:19:54 +0200 Subject: [PATCH 04/14] Better performing traversal check --- sanic/mixins/routes.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 1f789838e7..18a05414a8 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -806,23 +806,21 @@ async def _static_request_handler( __file_uri__=None, ): # Merge served directory and requested file if provided - # Strip all / that in the beginning of the URL to help prevent python - # from herping a derp and treating the uri as an absolute path root_path = file_path = unquote(file_or_directory) if __file_uri__: + # Strip all / that in the beginning of the URL to help prevent python + # from herping a derp and treating the uri as an absolute path unquoted_file_uri = unquote(__file_uri__).lstrip("/") - file_uri_pp = PurePath(unquoted_file_uri) - if any(part == ".." for part in file_uri_pp.parts): + segments = unquoted_file_uri.split("/") + if any(segment == ".." for segment in segments): raise BadRequest("Invalid URL") file_path = path.join( file_or_directory, unquoted_file_uri ) - # URL decode the path sent by the browser otherwise we won't be able to - # match filenames which got encoded (filenames with spaces etc) file_path = path.abspath(file_path) if not file_path.startswith(path.abspath(root_path)): error_logger.exception( From 73b1a1cc610b9d9624d068f85d57eb05ec735156 Mon Sep 17 00:00:00 2001 From: Adam Hopkins Date: Tue, 26 Jul 2022 21:59:19 +0300 Subject: [PATCH 05/14] Add tests --- sanic/mixins/routes.py | 11 +++++------ tests/test_static.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 18a05414a8..9ded28a5b9 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -809,17 +809,16 @@ async def _static_request_handler( root_path = file_path = unquote(file_or_directory) if __file_uri__: - # Strip all / that in the beginning of the URL to help prevent python - # from herping a derp and treating the uri as an absolute path + # Strip all / that in the beginning of the URL to help prevent + # python from herping a derp and treating the uri as an + # absolute path unquoted_file_uri = unquote(__file_uri__).lstrip("/") segments = unquoted_file_uri.split("/") - if any(segment == ".." for segment in segments): + if ".." in segments: raise BadRequest("Invalid URL") - file_path = path.join( - file_or_directory, unquoted_file_uri - ) + file_path = path.join(file_or_directory, unquoted_file_uri) file_path = path.abspath(file_path) if not file_path.startswith(path.abspath(root_path)): diff --git a/tests/test_static.py b/tests/test_static.py index 36a98e114f..8e2388055c 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -578,3 +578,18 @@ def test_resource_type_dir(app, static_file_directory): def test_resource_type_unknown(app, static_file_directory, caplog): with pytest.raises(ValueError): app.static("/static", static_file_directory, resource_type="unknown") + + +def test_dotted_dir_ok(app, static_file_directory): + app.static("/foo", static_file_directory) + + _, response = app.test_client.get("/foo/dotted../dot.txt") + assert response.status == 200 + assert response.body == b"DOT\n" + + +def test_breakout(app, static_file_directory): + app.static("/foo", static_file_directory) + + _, response = app.test_client.get("/foo/..%2Fstatic/test.file") + assert response.status == 400 From 7ac9868740eb3deb7744d1044f82f76d4896a178 Mon Sep 17 00:00:00 2001 From: Zhiwei Liang Date: Tue, 26 Jul 2022 18:48:00 -0500 Subject: [PATCH 06/14] Fix test case --- tests/static/dotted../dot.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/static/dotted../dot.txt diff --git a/tests/static/dotted../dot.txt b/tests/static/dotted../dot.txt new file mode 100644 index 0000000000..9d3a85d448 --- /dev/null +++ b/tests/static/dotted../dot.txt @@ -0,0 +1 @@ +DOT From 6ed4e63d0ddf814ecfae5808f7a7ad7484395458 Mon Sep 17 00:00:00 2001 From: Zhiwei Liang Date: Tue, 26 Jul 2022 20:45:11 -0500 Subject: [PATCH 07/14] Fix test on win32 --- tests/test_static.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/test_static.py b/tests/test_static.py index 8e2388055c..75840b30b8 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -1,6 +1,7 @@ import inspect import logging import os +import sys from collections import Counter from pathlib import Path @@ -8,7 +9,7 @@ import pytest -from sanic import text +from sanic import Sanic, text from sanic.exceptions import FileNotFound @@ -21,6 +22,19 @@ def static_file_directory(): return static_directory +@pytest.fixture(scope="module") +def double_dotted_directory_file(static_file_directory: str): + """Generate double dotted directory and its files""" + if sys.platform == "win32": + raise Exception("Windows doesn't support double dotted directories") + + file_path = Path(static_file_directory) / "dotted.." / "dot.txt" + Path.mkdir(file_path.parent, exist_ok=True) + with open(file_path, "w") as f: + f.write("DOT\n") + yield file_path + + def get_file_path(static_file_directory, file_name): return os.path.join(static_file_directory, file_name) @@ -580,15 +594,23 @@ def test_resource_type_unknown(app, static_file_directory, caplog): app.static("/static", static_file_directory, resource_type="unknown") -def test_dotted_dir_ok(app, static_file_directory): +@pytest.skipif( + sys.platform == "win32", + reason="Windows does not support double dotted directories", +) +def test_dotted_dir_ok( + app: Sanic, static_file_directory: str, double_dotted_directory_file: Path +): app.static("/foo", static_file_directory) - - _, response = app.test_client.get("/foo/dotted../dot.txt") + double_dotted_directory_file = double_dotted_directory_file.lstrip( + static_file_directory + ) + _, response = app.test_client.get("/foo/" + double_dotted_directory_file) assert response.status == 200 assert response.body == b"DOT\n" -def test_breakout(app, static_file_directory): +def test_breakout(app: Sanic, static_file_directory: str): app.static("/foo", static_file_directory) _, response = app.test_client.get("/foo/..%2Fstatic/test.file") From 11617f1af2ea1ded015024d604c591ddf0d62817 Mon Sep 17 00:00:00 2001 From: Zhiwei Liang Date: Tue, 26 Jul 2022 20:48:23 -0500 Subject: [PATCH 08/14] Fix test on win32 (2) --- tests/static/dotted../dot.txt | 1 - tests/test_static.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 tests/static/dotted../dot.txt diff --git a/tests/static/dotted../dot.txt b/tests/static/dotted../dot.txt deleted file mode 100644 index 9d3a85d448..0000000000 --- a/tests/static/dotted../dot.txt +++ /dev/null @@ -1 +0,0 @@ -DOT diff --git a/tests/test_static.py b/tests/test_static.py index 75840b30b8..dc57a9a669 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -594,7 +594,7 @@ def test_resource_type_unknown(app, static_file_directory, caplog): app.static("/static", static_file_directory, resource_type="unknown") -@pytest.skipif( +@pytest.mark.skipif( sys.platform == "win32", reason="Windows does not support double dotted directories", ) @@ -602,7 +602,7 @@ def test_dotted_dir_ok( app: Sanic, static_file_directory: str, double_dotted_directory_file: Path ): app.static("/foo", static_file_directory) - double_dotted_directory_file = double_dotted_directory_file.lstrip( + double_dotted_directory_file = str(double_dotted_directory_file).lstrip( static_file_directory ) _, response = app.test_client.get("/foo/" + double_dotted_directory_file) From 297980e2ca9a0c08385924c4a855c89aee0672a0 Mon Sep 17 00:00:00 2001 From: Zhiwei Liang Date: Tue, 26 Jul 2022 20:51:06 -0500 Subject: [PATCH 09/14] Generated static files cleanup --- tests/test_static.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_static.py b/tests/test_static.py index dc57a9a669..f0471c25de 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -29,10 +29,13 @@ def double_dotted_directory_file(static_file_directory: str): raise Exception("Windows doesn't support double dotted directories") file_path = Path(static_file_directory) / "dotted.." / "dot.txt" - Path.mkdir(file_path.parent, exist_ok=True) + double_dotted_dir = file_path.parent + Path.mkdir(double_dotted_dir, exist_ok=True) with open(file_path, "w") as f: f.write("DOT\n") yield file_path + Path.unlink(file_path) + Path.rmdir(double_dotted_dir) def get_file_path(static_file_directory, file_name): From 9e463eecb479cd1c572a8026df968d5ec2551e50 Mon Sep 17 00:00:00 2001 From: Zhiwei Liang Date: Wed, 27 Jul 2022 00:12:56 -0500 Subject: [PATCH 10/14] Optimized logic --- sanic/mixins/routes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 9ded28a5b9..57f3a03bf3 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -806,7 +806,7 @@ async def _static_request_handler( __file_uri__=None, ): # Merge served directory and requested file if provided - root_path = file_path = unquote(file_or_directory) + root_path = file_path = path.abspath(unquote(file_or_directory)) if __file_uri__: # Strip all / that in the beginning of the URL to help prevent @@ -819,9 +819,9 @@ async def _static_request_handler( raise BadRequest("Invalid URL") file_path = path.join(file_or_directory, unquoted_file_uri) + file_path = path.abspath(file_path) - file_path = path.abspath(file_path) - if not file_path.startswith(path.abspath(root_path)): + if not file_path.startswith(root_path): error_logger.exception( f"File not found: path={file_or_directory}, " f"relative_url={__file_uri__}" From 76ec4d3ff23cc3c18822812c1ff511c9b85d5ee1 Mon Sep 17 00:00:00 2001 From: Zhiwei Liang Date: Wed, 27 Jul 2022 01:21:51 -0500 Subject: [PATCH 11/14] Prohibit backslash in static file path for win32 --- sanic/mixins/routes.py | 4 ++-- tests/test_static.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 57f3a03bf3..8a5f10cc9c 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -3,7 +3,7 @@ from functools import partial, wraps from inspect import getsource, signature from mimetypes import guess_type -from os import path +from os import path, sep from pathlib import PurePath from textwrap import dedent from time import gmtime, strftime @@ -815,7 +815,7 @@ async def _static_request_handler( unquoted_file_uri = unquote(__file_uri__).lstrip("/") segments = unquoted_file_uri.split("/") - if ".." in segments: + if ".." in segments or any(sep in segment for segment in segments): raise BadRequest("Invalid URL") file_path = path.join(file_or_directory, unquoted_file_uri) diff --git a/tests/test_static.py b/tests/test_static.py index f0471c25de..cf2d12c5b1 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -618,3 +618,13 @@ def test_breakout(app: Sanic, static_file_directory: str): _, response = app.test_client.get("/foo/..%2Fstatic/test.file") assert response.status == 400 + + +@pytest.mark.skipif(sys.platform != "win32") +def test_double_backslash_prohibited_on_win32(app: Sanic, static_file_directory: str): + app.static("/foo", static_file_directory) + + _, response = app.test_client.get("/foo/static/..\\static/test.file") + assert response.status == 400 + _, response = app.test_client.get("/foo/static\\../static/test.file") + assert response.status == 400 From 448e759a4c93360bdf7bc7076d5f462ca3f662e2 Mon Sep 17 00:00:00 2001 From: Zhiwei Liang Date: Wed, 27 Jul 2022 01:28:39 -0500 Subject: [PATCH 12/14] Add skip reason --- tests/test_static.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_static.py b/tests/test_static.py index cf2d12c5b1..fd33ee03f3 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -620,8 +620,12 @@ def test_breakout(app: Sanic, static_file_directory: str): assert response.status == 400 -@pytest.mark.skipif(sys.platform != "win32") -def test_double_backslash_prohibited_on_win32(app: Sanic, static_file_directory: str): +@pytest.mark.skipif( + sys.platform != "win32", reason="Block backslash on Windows only" +) +def test_double_backslash_prohibited_on_win32( + app: Sanic, static_file_directory: str +): app.static("/foo", static_file_directory) _, response = app.test_client.get("/foo/static/..\\static/test.file") From 9c8ca3628e8aeb308b5ae3c68eb82eda17e28ddf Mon Sep 17 00:00:00 2001 From: Zhiwei Date: Wed, 27 Jul 2022 09:42:14 -0500 Subject: [PATCH 13/14] Make it to be one loop for performance optimization --- sanic/mixins/routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 8a5f10cc9c..6291747551 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -815,7 +815,7 @@ async def _static_request_handler( unquoted_file_uri = unquote(__file_uri__).lstrip("/") segments = unquoted_file_uri.split("/") - if ".." in segments or any(sep in segment for segment in segments): + if any(segment == ".." or sep in segment for segment in segments): raise BadRequest("Invalid URL") file_path = path.join(file_or_directory, unquoted_file_uri) From b57dca323456121b711bbcf8a864a2ee4633c512 Mon Sep 17 00:00:00 2001 From: Zhiwei Date: Thu, 28 Jul 2022 00:58:59 -0500 Subject: [PATCH 14/14] Performance optimization with benchmark results --- sanic/mixins/routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 6291747551..8a5f10cc9c 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -815,7 +815,7 @@ async def _static_request_handler( unquoted_file_uri = unquote(__file_uri__).lstrip("/") segments = unquoted_file_uri.split("/") - if any(segment == ".." or sep in segment for segment in segments): + if ".." in segments or any(sep in segment for segment in segments): raise BadRequest("Invalid URL") file_path = path.join(file_or_directory, unquoted_file_uri)