From 75c0063995e0ba47c2eae9fa2c5961690c4d857f Mon Sep 17 00:00:00 2001 From: Arto Jantunen Date: Wed, 9 Nov 2022 12:20:50 +0200 Subject: [PATCH 1/4] Move seekability test to a separate helper function Having a third copy of this code would feel silly. --- storages/backends/gcloud.py | 3 ++- storages/backends/s3boto3.py | 3 ++- storages/utils.py | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 7e2738416..837b657bf 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -15,6 +15,7 @@ from storages.utils import check_location from storages.utils import clean_name from storages.utils import get_available_overwrite_name +from storages.utils import is_seekable from storages.utils import safe_join from storages.utils import setting from storages.utils import to_bytes @@ -194,7 +195,7 @@ def _save(self, name, content): for prop, val in blob_params.items(): setattr(file_object.blob, prop, val) - rewind = not hasattr(content, 'seekable') or content.seekable() + rewind = is_seekable(content) file_object.blob.upload_from_file( content, rewind=rewind, diff --git a/storages/backends/s3boto3.py b/storages/backends/s3boto3.py index 9af0dbb3a..28ed7dd6e 100644 --- a/storages/backends/s3boto3.py +++ b/storages/backends/s3boto3.py @@ -24,6 +24,7 @@ from storages.compress import CompressStorageMixin from storages.utils import check_location from storages.utils import get_available_overwrite_name +from storages.utils import is_seekable from storages.utils import lookup_env from storages.utils import safe_join from storages.utils import setting @@ -445,7 +446,7 @@ def _save(self, name, content): name = self._normalize_name(cleaned_name) params = self._get_write_parameters(name, content) - if not hasattr(content, 'seekable') or content.seekable(): + if is_seekable(content): content.seek(0, os.SEEK_SET) if (self.gzip and params['ContentType'] in self.gzip_content_types and diff --git a/storages/utils.py b/storages/utils.py index ce7b54af1..4a5d8f41c 100644 --- a/storages/utils.py +++ b/storages/utils.py @@ -125,3 +125,7 @@ def get_available_overwrite_name(name, max_length): 'allows sufficient "max_length".' % name ) return os.path.join(dir_name, "{}{}".format(file_root, file_ext)) + + +def is_seekable(file_object): + return not hasattr(file_object, 'seekable') or file_object.seekable() From af38ba520e512b62abc5da9b84c4915914fd91c5 Mon Sep 17 00:00:00 2001 From: Arto Jantunen Date: Wed, 9 Nov 2022 12:21:11 +0200 Subject: [PATCH 2/4] Stop calling content.open() from SFTPStorage._save The open method isn't available in all file-like objects one might want to save using the storage framework, so instead of relying on it calling seek(0) for us do it ourselves (conditionally). --- storages/backends/sftpstorage.py | 4 +++- tests/test_sftp.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/storages/backends/sftpstorage.py b/storages/backends/sftpstorage.py index 643685c88..3ce5b031b 100644 --- a/storages/backends/sftpstorage.py +++ b/storages/backends/sftpstorage.py @@ -17,6 +17,7 @@ from django.utils.deconstruct import deconstructible from storages.base import BaseStorage +from storages.utils import is_seekable from storages.utils import setting @@ -123,7 +124,8 @@ def _mkdir(self, path): def _save(self, name, content): """Save file via SFTP.""" - content.open() + if is_seekable(content): + content.seek(0, os.SEEK_SET) path = self._remote_path(name) dirname = posixpath.dirname(path) if not self.exists(dirname): diff --git a/tests/test_sftp.py b/tests/test_sftp.py index a7fd99223..57b40cfd1 100644 --- a/tests/test_sftp.py +++ b/tests/test_sftp.py @@ -12,6 +12,7 @@ from django.test import override_settings from storages.backends import sftpstorage +from tests.utils import NonSeekableContentFile class SFTPStorageTest(TestCase): @@ -71,6 +72,11 @@ def test_save(self, mock_sftp): self.storage._save('foo', File(io.BytesIO(b'foo'), 'foo')) self.assertTrue(mock_sftp.open.return_value.write.called) + @patch('storages.backends.sftpstorage.SFTPStorage.sftp') + def test_save_non_seekable(self, mock_sftp): + self.storage._save('foo', NonSeekableContentFile('foo')) + self.assertTrue(mock_sftp.open.return_value.write.called) + @patch('storages.backends.sftpstorage.SFTPStorage.sftp', **{ 'stat.side_effect': (FileNotFoundError(), True) }) From 094240f32a997f486d5fc3fd9e720c14f5053bad Mon Sep 17 00:00:00 2001 From: Arto Jantunen Date: Wed, 9 Nov 2022 13:30:55 +0200 Subject: [PATCH 3/4] Require at least version 1.10.0 of paramiko This version introduced the SFTPClient.putfo() method. It was released in early 2013, so this shouldn't be a great hardship. --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 46bbdce4e..d01e99546 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,7 +46,7 @@ google = libcloud = apache-libcloud sftp = - paramiko + paramiko >= 1.10.0 [flake8] exclude = From a348addca9f0c2121ebecd2f8d096c9e52354f86 Mon Sep 17 00:00:00 2001 From: Arto Jantunen Date: Wed, 9 Nov 2022 13:31:53 +0200 Subject: [PATCH 4/4] Use SFTPClient.putfo() in SFTPStorage._save() Instead of using SFTPClient.open() to get a file-like object, reading the entire content into memory and then calling .write() on that. The measured performance difference is stunning, and the uploaded file data no longer needs to fit into memory. --- storages/backends/sftpstorage.py | 4 +--- tests/test_sftp.py | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/storages/backends/sftpstorage.py b/storages/backends/sftpstorage.py index 3ce5b031b..34a5755c2 100644 --- a/storages/backends/sftpstorage.py +++ b/storages/backends/sftpstorage.py @@ -131,9 +131,7 @@ def _save(self, name, content): if not self.exists(dirname): self._mkdir(dirname) - f = self.sftp.open(path, 'wb') - f.write(content.file.read()) - f.close() + self.sftp.putfo(content, path) # set file permissions if configured if self._file_mode is not None: diff --git a/tests/test_sftp.py b/tests/test_sftp.py index 57b40cfd1..703a6b272 100644 --- a/tests/test_sftp.py +++ b/tests/test_sftp.py @@ -70,12 +70,12 @@ def test_mkdir_parent(self, mock_sftp): @patch('storages.backends.sftpstorage.SFTPStorage.sftp') def test_save(self, mock_sftp): self.storage._save('foo', File(io.BytesIO(b'foo'), 'foo')) - self.assertTrue(mock_sftp.open.return_value.write.called) + self.assertTrue(mock_sftp.putfo.called) @patch('storages.backends.sftpstorage.SFTPStorage.sftp') def test_save_non_seekable(self, mock_sftp): self.storage._save('foo', NonSeekableContentFile('foo')) - self.assertTrue(mock_sftp.open.return_value.write.called) + self.assertTrue(mock_sftp.putfo.called) @patch('storages.backends.sftpstorage.SFTPStorage.sftp', **{ 'stat.side_effect': (FileNotFoundError(), True) @@ -83,7 +83,7 @@ def test_save_non_seekable(self, mock_sftp): def test_save_in_subdir(self, mock_sftp): self.storage._save('bar/foo', File(io.BytesIO(b'foo'), 'foo')) self.assertEqual(mock_sftp.mkdir.call_args_list[0][0], ('bar',)) - self.assertTrue(mock_sftp.open.return_value.write.called) + self.assertTrue(mock_sftp.putfo.called) @patch('storages.backends.sftpstorage.SFTPStorage.sftp') def test_delete(self, mock_sftp): @@ -218,4 +218,4 @@ def test_write(self): def test_close(self, mock_sftp): self.file.write(b'foo') self.file.close() - self.assertTrue(mock_sftp.open.return_value.write.called) + self.assertTrue(mock_sftp.putfo.called)