From 394563576fd62fa4d529e340e8cfc269434fcf6f Mon Sep 17 00:00:00 2001 From: rafalp Date: Tue, 11 Jan 2022 21:52:33 +0100 Subject: [PATCH 1/5] Add size attribute to UploadFile --- docs/requests.md | 1 + starlette/datastructures.py | 4 ++++ tests/test_datastructures.py | 8 +++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/requests.md b/docs/requests.md index 747e496d1..11fb58343 100644 --- a/docs/requests.md +++ b/docs/requests.md @@ -123,6 +123,7 @@ multidict, containing both file uploads and text input. File upload items are re * `content_type`: A `str` with the content type (MIME type / media type) (e.g. `image/jpeg`). * `file`: A `SpooledTemporaryFile` (a file-like object). This is the actual Python file that you can pass directly to other functions or libraries that expect a "file-like" object. * `headers`: A `Headers` object. Often this will only be the `Content-Type` header, but if additional headers were included in the multipart field they will be included here. Note that these headers have no relationship with the headers in `Request.headers`. +* `size`: An `int` with file's size in bytes. `UploadFile` has the following `async` methods. They all call the corresponding file methods underneath (using the internal `SpooledTemporaryFile`). diff --git a/starlette/datastructures.py b/starlette/datastructures.py index eee3834e0..00c9810e4 100644 --- a/starlette/datastructures.py +++ b/starlette/datastructures.py @@ -430,12 +430,14 @@ class UploadFile: def __init__( self, file: typing.BinaryIO, + size: int, *, filename: typing.Optional[str] = None, headers: "typing.Optional[Headers]" = None, ) -> None: self.filename = filename self.file = file + self.size = size self.headers = headers or Headers() @property @@ -449,6 +451,8 @@ def _in_memory(self) -> bool: return not rolled_to_disk async def write(self, data: bytes) -> None: + self.size += len(data) + if self._in_memory: self.file.write(data) else: diff --git a/tests/test_datastructures.py b/tests/test_datastructures.py index 16f9da4a5..0271cfd08 100644 --- a/tests/test_datastructures.py +++ b/tests/test_datastructures.py @@ -275,10 +275,12 @@ def test_queryparams(): async def test_upload_file_file_input(): """Test passing file/stream into the UploadFile constructor""" stream = io.BytesIO(b"data") - file = UploadFile(filename="file", file=stream) + file = UploadFile(filename="file", file=stream, size=len(stream)) + assert file.size == 4 assert await file.read() == b"data" await file.write(b" and more data!") assert await file.read() == b"" + assert file.size == 19 await file.seek(0) assert await file.read() == b"data and more data!" @@ -292,7 +294,7 @@ async def test_uploadfile_rolling(max_size: int) -> None: stream: BinaryIO = SpooledTemporaryFile( # type: ignore[assignment] max_size=max_size ) - file = UploadFile(filename="file", file=stream) + file = UploadFile(filename="file", file=stream, size=len(stream)) assert await file.read() == b"" await file.write(b"data") assert await file.read() == b"" @@ -307,7 +309,7 @@ async def test_uploadfile_rolling(max_size: int) -> None: def test_formdata(): stream = io.BytesIO(b"data") - upload = UploadFile(filename="file", file=stream) + upload = UploadFile(filename="file", file=stream, size=len(stream)) form = FormData([("a", "123"), ("a", "456"), ("b", upload)]) assert "a" in form assert "A" not in form From 3deaed6cddb76e06580d15ca19bbf3495012b68b Mon Sep 17 00:00:00 2001 From: rafalp Date: Sat, 4 Feb 2023 23:21:57 +0100 Subject: [PATCH 2/5] Initialize UploadFile with size=0 in multipart request parser --- starlette/formparsers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/starlette/formparsers.py b/starlette/formparsers.py index 739befae8..eb76c6f10 100644 --- a/starlette/formparsers.py +++ b/starlette/formparsers.py @@ -231,6 +231,7 @@ async def parse(self) -> FormData: tempfile = SpooledTemporaryFile(max_size=self.max_file_size) file = UploadFile( file=tempfile, # type: ignore[arg-type] + size=0, filename=filename, headers=Headers(raw=item_headers), ) From f9239946500166af71975883f2b5c48d6429aca5 Mon Sep 17 00:00:00 2001 From: rafalp Date: Sat, 4 Feb 2023 23:33:10 +0100 Subject: [PATCH 3/5] Fix uploads tests --- tests/test_datastructures.py | 12 ++++++++---- tests/test_formparsers.py | 13 +++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/tests/test_datastructures.py b/tests/test_datastructures.py index 0271cfd08..6e6e9226b 100644 --- a/tests/test_datastructures.py +++ b/tests/test_datastructures.py @@ -275,9 +275,9 @@ def test_queryparams(): async def test_upload_file_file_input(): """Test passing file/stream into the UploadFile constructor""" stream = io.BytesIO(b"data") - file = UploadFile(filename="file", file=stream, size=len(stream)) - assert file.size == 4 + file = UploadFile(filename="file", file=stream, size=4) assert await file.read() == b"data" + assert file.size == 4 await file.write(b" and more data!") assert await file.read() == b"" assert file.size == 19 @@ -294,22 +294,26 @@ async def test_uploadfile_rolling(max_size: int) -> None: stream: BinaryIO = SpooledTemporaryFile( # type: ignore[assignment] max_size=max_size ) - file = UploadFile(filename="file", file=stream, size=len(stream)) + file = UploadFile(filename="file", file=stream, size=0) assert await file.read() == b"" + assert file.size == 0 await file.write(b"data") assert await file.read() == b"" + assert file.size == 4 await file.seek(0) assert await file.read() == b"data" await file.write(b" more") assert await file.read() == b"" + assert file.size == 9 await file.seek(0) assert await file.read() == b"data more" + assert file.size == 9 await file.close() def test_formdata(): stream = io.BytesIO(b"data") - upload = UploadFile(filename="file", file=stream, size=len(stream)) + upload = UploadFile(filename="file", file=stream, size=4) form = FormData([("a", "123"), ("a", "456"), ("b", upload)]) assert "a" in form assert "A" not in form diff --git a/tests/test_formparsers.py b/tests/test_formparsers.py index 4792424ab..804ce8d26 100644 --- a/tests/test_formparsers.py +++ b/tests/test_formparsers.py @@ -29,6 +29,7 @@ async def app(scope, receive, send): content = await value.read() output[key] = { "filename": value.filename, + "size": value.size, "content": content.decode(), "content_type": value.content_type, } @@ -51,6 +52,7 @@ async def multi_items_app(scope, receive, send): output[key].append( { "filename": value.filename, + "size": value.size, "content": content.decode(), "content_type": value.content_type, } @@ -71,6 +73,7 @@ async def app_with_headers(scope, receive, send): content = await value.read() output[key] = { "filename": value.filename, + "size": value.size, "content": content.decode(), "content_type": value.content_type, "headers": list(value.headers.items()), @@ -112,6 +115,7 @@ def test_multipart_request_files(tmpdir, test_client_factory): assert response.json() == { "test": { "filename": "test.txt", + "size": 14, "content": "", "content_type": "text/plain", } @@ -129,6 +133,7 @@ def test_multipart_request_files_with_content_type(tmpdir, test_client_factory): assert response.json() == { "test": { "filename": "test.txt", + "size": 14, "content": "", "content_type": "text/plain", } @@ -152,11 +157,13 @@ def test_multipart_request_multiple_files(tmpdir, test_client_factory): assert response.json() == { "test1": { "filename": "test1.txt", + "size": 15, "content": "", "content_type": "text/plain", }, "test2": { "filename": "test2.txt", + "size": 15, "content": "", "content_type": "text/plain", }, @@ -185,6 +192,7 @@ def test_multipart_request_multiple_files_with_headers(tmpdir, test_client_facto "test1": "", "test2": { "filename": "test2.txt", + "size": 15, "content": "", "content_type": "text/plain", "headers": [ @@ -220,11 +228,13 @@ def test_multi_items(tmpdir, test_client_factory): "abc", { "filename": "test1.txt", + "size": 15, "content": "", "content_type": "text/plain", }, { "filename": "test2.txt", + "size": 15, "content": "", "content_type": "text/plain", }, @@ -261,6 +271,7 @@ def test_multipart_request_mixed_files_and_data(tmpdir, test_client_factory): assert response.json() == { "file": { "filename": "file.txt", + "size": 14, "content": "", "content_type": "text/plain", }, @@ -291,6 +302,7 @@ def test_multipart_request_with_charset_for_filename(tmpdir, test_client_factory assert response.json() == { "file": { "filename": "文書.txt", + "size": 14, "content": "", "content_type": "text/plain", } @@ -318,6 +330,7 @@ def test_multipart_request_without_charset_for_filename(tmpdir, test_client_fact assert response.json() == { "file": { "filename": "画像.jpg", + "size": 14, "content": "", "content_type": "image/jpeg", } From 1d3a627b7dd87936141327a4e2ba43f0fa5eba8e Mon Sep 17 00:00:00 2001 From: rafalp Date: Sat, 4 Feb 2023 23:44:49 +0100 Subject: [PATCH 4/5] Improve UploadFile.size docs --- docs/requests.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requests.md b/docs/requests.md index 11fb58343..65b81d732 100644 --- a/docs/requests.md +++ b/docs/requests.md @@ -123,7 +123,7 @@ multidict, containing both file uploads and text input. File upload items are re * `content_type`: A `str` with the content type (MIME type / media type) (e.g. `image/jpeg`). * `file`: A `SpooledTemporaryFile` (a file-like object). This is the actual Python file that you can pass directly to other functions or libraries that expect a "file-like" object. * `headers`: A `Headers` object. Often this will only be the `Content-Type` header, but if additional headers were included in the multipart field they will be included here. Note that these headers have no relationship with the headers in `Request.headers`. -* `size`: An `int` with file's size in bytes. +* `size`: An `int` with uploaded file's size in bytes. This value is calculated from request's contents, making it better choice to find uploaded file's size than `Content-Length` header. `UploadFile` has the following `async` methods. They all call the corresponding file methods underneath (using the internal `SpooledTemporaryFile`). From 1993b8c0ffb2b16de202bd32d42adcd3c73574f5 Mon Sep 17 00:00:00 2001 From: rafalp Date: Sun, 5 Feb 2023 02:16:16 +0100 Subject: [PATCH 5/5] Make size on UploadFile optional --- docs/requests.md | 2 +- starlette/datastructures.py | 5 +++-- tests/test_datastructures.py | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/requests.md b/docs/requests.md index 65b81d732..b512bd1fc 100644 --- a/docs/requests.md +++ b/docs/requests.md @@ -123,7 +123,7 @@ multidict, containing both file uploads and text input. File upload items are re * `content_type`: A `str` with the content type (MIME type / media type) (e.g. `image/jpeg`). * `file`: A `SpooledTemporaryFile` (a file-like object). This is the actual Python file that you can pass directly to other functions or libraries that expect a "file-like" object. * `headers`: A `Headers` object. Often this will only be the `Content-Type` header, but if additional headers were included in the multipart field they will be included here. Note that these headers have no relationship with the headers in `Request.headers`. -* `size`: An `int` with uploaded file's size in bytes. This value is calculated from request's contents, making it better choice to find uploaded file's size than `Content-Length` header. +* `size`: An `int` with uploaded file's size in bytes. This value is calculated from request's contents, making it better choice to find uploaded file's size than `Content-Length` header. `None` if not set. `UploadFile` has the following `async` methods. They all call the corresponding file methods underneath (using the internal `SpooledTemporaryFile`). diff --git a/starlette/datastructures.py b/starlette/datastructures.py index 00c9810e4..dd314a4e6 100644 --- a/starlette/datastructures.py +++ b/starlette/datastructures.py @@ -430,8 +430,8 @@ class UploadFile: def __init__( self, file: typing.BinaryIO, - size: int, *, + size: typing.Optional[int] = None, filename: typing.Optional[str] = None, headers: "typing.Optional[Headers]" = None, ) -> None: @@ -451,7 +451,8 @@ def _in_memory(self) -> bool: return not rolled_to_disk async def write(self, data: bytes) -> None: - self.size += len(data) + if self.size is not None: + self.size += len(data) if self._in_memory: self.file.write(data) diff --git a/tests/test_datastructures.py b/tests/test_datastructures.py index 6e6e9226b..6cf58e7b0 100644 --- a/tests/test_datastructures.py +++ b/tests/test_datastructures.py @@ -285,6 +285,20 @@ async def test_upload_file_file_input(): assert await file.read() == b"data and more data!" +@pytest.mark.anyio +async def test_upload_file_without_size(): + """Test passing file/stream into the UploadFile constructor without size""" + stream = io.BytesIO(b"data") + file = UploadFile(filename="file", file=stream) + assert await file.read() == b"data" + assert file.size is None + await file.write(b" and more data!") + assert await file.read() == b"" + assert file.size is None + await file.seek(0) + assert await file.read() == b"data and more data!" + + @pytest.mark.anyio @pytest.mark.parametrize("max_size", [1, 1024], ids=["rolled", "unrolled"]) async def test_uploadfile_rolling(max_size: int) -> None: