Skip to content

Commit

Permalink
[s3] Fix newline handling for text-mode files
Browse files Browse the repository at this point in the history
  • Loading branch information
jschneier committed May 10, 2024
1 parent 9fca46c commit 3b06da2
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 24 deletions.
17 changes: 14 additions & 3 deletions storages/backends/s3.py
@@ -1,3 +1,4 @@
import io
import mimetypes
import os
import posixpath
Expand Down Expand Up @@ -123,7 +124,6 @@ def __init__(self, name, mode, storage, buffer_size=None):
self._storage = storage
self.name = name[len(self._storage.location) :].lstrip("/")
self._mode = mode
self._force_mode = (lambda b: b) if "b" in mode else (lambda b: b.decode())
self.obj = storage.bucket.Object(name)
if "w" not in mode:
# Force early RAII-style exception if object does not exist
Expand Down Expand Up @@ -184,6 +184,17 @@ def _get_file(self):
self._file.seek(0)
if self._storage.gzip and self.obj.content_encoding == "gzip":
self._file = self._decompress_file(mode=self._mode, file=self._file)
elif "b" not in self._mode:
if hasattr(self._file, "readable"):
# For versions > Python 3.10 compatibility
# See SpooledTemporaryFile changes in 3.11 (https://docs.python.org/3/library/tempfile.html)
# Now fully implements the io.BufferedIOBase and io.TextIOBase abstract base classes allowing the file
# to be readable in the mode that it was specified (without accessing the underlying _file object).
# In this case, we need to wrap the file in a TextIOWrapper to ensure that the file is read as a text file.
self._file = io.TextIOWrapper(self._file, encoding="utf-8")
else:
# For versions <= Python 3.10 compatibility
self._file = io.TextIOWrapper(self._file._file, encoding="utf-8")
self._closed = False
return self._file

Expand All @@ -195,12 +206,12 @@ def _set_file(self, value):
def read(self, *args, **kwargs):
if "r" not in self._mode:
raise AttributeError("File was not opened in read mode.")
return self._force_mode(super().read(*args, **kwargs))
return super().read(*args, **kwargs)

def readline(self, *args, **kwargs):
if "r" not in self._mode:
raise AttributeError("File was not opened in read mode.")
return self._force_mode(super().readline(*args, **kwargs))
return super().readline(*args, **kwargs)

def readlines(self):
return list(self)
Expand Down
60 changes: 39 additions & 21 deletions tests/test_s3.py
Expand Up @@ -317,23 +317,6 @@ def test_storage_open_read_string(self):
content_str = file.read()
self.assertEqual(content_str, "")

def test_storage_open_readlines(self):
"""
Test readlines with file opened in "r" and "rb" modes
"""
name = "test_open_readlines_string.txt"
with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2")
with self.storage.open(name, "r") as f1:
f1._file = temp_file
content_lines = f1.readlines()
self.assertEqual(content_lines, ["line1\n", "line2"])
temp_file.seek(0)
with self.storage.open(name, "rb") as f2:
f2._file = temp_file
content_lines = f2.readlines()
self.assertEqual(content_lines, [b"line1\n", b"line2"])

def test_storage_open_write(self):
"""
Test opening a file in write mode
Expand All @@ -348,7 +331,7 @@ def test_storage_open_write(self):
"ACL": "public-read",
}

with self.storage.open(name, "w") as file:
with self.storage.open(name, "wb") as file:
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value
# Set the name of the mock object
Expand Down Expand Up @@ -398,7 +381,7 @@ def test_storage_open_no_write(self):
"StorageClass": "REDUCED_REDUNDANCY",
}

with self.storage.open(name, "w"):
with self.storage.open(name, "wb"):
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value
obj.load.side_effect = ClientError(
Expand Down Expand Up @@ -429,7 +412,7 @@ def test_storage_open_no_overwrite_existing(self):
"StorageClass": "REDUCED_REDUNDANCY",
}

with self.storage.open(name, "w"):
with self.storage.open(name, "wb"):
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value

Expand All @@ -451,7 +434,7 @@ def test_storage_write_beyond_buffer_size(self):
"StorageClass": "REDUCED_REDUNDANCY",
}

with self.storage.open(name, "w") as file:
with self.storage.open(name, "wb") as file:
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value
# Set the name of the mock object
Expand Down Expand Up @@ -1167,6 +1150,41 @@ def test_content_type_not_detectable(self):
s3.S3Storage.default_content_type,
)

def test_storage_open_reading_with_newlines(self):
"""Test file reading with "r" and "rb" and various newline characters."""
name = "test_storage_open_read_with_newlines.txt"
with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext\n")
self.storage.save(name, temp_file)
file = self.storage.open(name, "r")
content_str = file.read()
file.close()
self.assertEqual(content_str, "line1\nline2\nmore\ntext\n")

with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext\n")
self.storage.save(name, temp_file)
file = self.storage.open(name, "rb")
content_str = file.read()
file.close()
self.assertEqual(content_str, b"line1\nline2\r\nmore\rtext\n")

with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext")
self.storage.save(name, temp_file)
file = self.storage.open(name, "r")
content_lines = file.readlines()
file.close()
self.assertEqual(content_lines, ["line1\n", "line2\n", "more\n", "text"])

with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext")
self.storage.save(name, temp_file)
file = self.storage.open(name, "rb")
content_lines = file.readlines()
file.close()
self.assertEqual(content_lines, [b"line1\n", b"line2\r\n", b"more\r", b"text"])


class TestBackwardsNames(TestCase):
def test_importing(self):
Expand Down

0 comments on commit 3b06da2

Please sign in to comment.