From b1ec5b4e777345eba0236195bf3c42356b1f394c Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 30 Sep 2022 14:14:43 +0100 Subject: [PATCH 1/2] Streaming multipart support --- httpx/_multipart.py | 46 ++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index 0329649758..bef415d468 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -129,19 +129,18 @@ def __init__(self, name: str, value: FileTypes) -> None: self.file = fileobj self.headers = headers - def get_length(self) -> int: + def get_length(self) -> typing.Optional[int]: headers = self.render_headers() if isinstance(self.file, (str, bytes)): return len(headers) + len(to_bytes(self.file)) - # Let's do our best not to read `file` into memory. file_length = peek_filelike_length(self.file) + + # If we can't determine the filesize without reading it into memory, + # then return `None` here, to indicate an unknown file length. if file_length is None: - # As a last resort, read file and cache contents for later. - assert not hasattr(self, "_data") - self._data = to_bytes(self.file.read()) - file_length = len(self._data) + return None return len(headers) + file_length @@ -167,11 +166,6 @@ def render_data(self) -> typing.Iterator[bytes]: yield to_bytes(self.file) return - if hasattr(self, "_data"): - # Already rendered. - yield self._data - return - if hasattr(self.file, "seek"): self.file.seek(0) @@ -226,24 +220,34 @@ def iter_chunks(self) -> typing.Iterator[bytes]: yield b"\r\n" yield b"--%s--\r\n" % self.boundary - def iter_chunks_lengths(self) -> typing.Iterator[int]: + def get_content_length(self) -> typing.Optional[int]: + """ + Return the length of the multipart encoded content, or `None` if + any of the files have a length that cannot be determined upfront. + """ boundary_length = len(self.boundary) - # Follow closely what `.iter_chunks()` does. + length = 0 + for field in self.fields: - yield 2 + boundary_length + 2 - yield field.get_length() - yield 2 - yield 2 + boundary_length + 4 + field_length = field.get_length() + if field_length is None: + return None + + length += 2 + boundary_length + 2 # b"--{boundary}\r\n" + length += field_length + length += 2 # b"\r\n" - def get_content_length(self) -> int: - return sum(self.iter_chunks_lengths()) + length += 2 + boundary_length + 4 # b"--{boundary}--\r\n" + return length # Content stream interface. def get_headers(self) -> typing.Dict[str, str]: - content_length = str(self.get_content_length()) + content_length = self.get_content_length() content_type = self.content_type - return {"Content-Length": content_length, "Content-Type": content_type} + if content_length is None: + return {"Transfer-Encoding": "chunked", "Content-Type": content_type} + return {"Content-Length": str(content_length), "Content-Type": content_type} def __iter__(self) -> typing.Iterator[bytes]: for chunk in self.iter_chunks(): From 3c828d11f20319eaaac6102a0f3518342f3a2fff Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 30 Sep 2022 14:18:47 +0100 Subject: [PATCH 2/2] Update tests for streaming multipary --- httpx/_multipart.py | 5 ++++- tests/test_multipart.py | 7 ++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index bef415d468..86743d86d9 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -167,7 +167,10 @@ def render_data(self) -> typing.Iterator[bytes]: return if hasattr(self.file, "seek"): - self.file.seek(0) + try: + self.file.seek(0) + except io.UnsupportedOperation: + pass chunk = self.file.read(self.CHUNK_SIZE) while chunk: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index dc93d26505..6e92e552ed 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -357,8 +357,9 @@ def test_multipart_encode_files_raises_exception_with_StringIO_content() -> None def test_multipart_encode_non_seekable_filelike() -> None: """ - Test that special readable but non-seekable filelike objects are supported, - at the cost of reading them into memory at most once. + Test that special readable but non-seekable filelike objects are supported. + In this case uploads with use 'Transfer-Encoding: chunked', instead of + a 'Content-Length' header. """ class IteratorIO(io.IOBase): @@ -387,7 +388,7 @@ def data() -> typing.Iterator[bytes]: ) assert headers == { "Content-Type": "multipart/form-data; boundary=+++", - "Content-Length": str(len(content)), + "Transfer-Encoding": "chunked", } assert content == b"".join(stream)