From 04676b53ba3d5958199caff55c7f5b48e060d6b1 Mon Sep 17 00:00:00 2001 From: vainu-arto <70135394+vainu-arto@users.noreply.github.com> Date: Wed, 9 Nov 2022 22:40:10 +0200 Subject: [PATCH] [sftp] Improve write & memory performance when saving files (#1194) * Move seekability test to a separate helper function Having a third copy of this code would feel silly. * Stop calling content.open() from SFTPStorage._save The open method isn't available in all file-like objects one might want to save using the storage framework, so instead of relying on it calling seek(0) for us do it ourselves (conditionally). * Require at least version 1.10.0 of paramiko This version introduced the SFTPClient.putfo() method. It was released in early 2013, so this shouldn't be a great hardship. * Use SFTPClient.putfo() in SFTPStorage._save() Instead of using SFTPClient.open() to get a file-like object, reading the entire content into memory and then calling .write() on that. The measured performance difference is stunning, and the uploaded file data no longer needs to fit into memory. --- setup.cfg | 2 +- storages/backends/gcloud.py | 3 ++- storages/backends/s3boto3.py | 3 ++- storages/backends/sftpstorage.py | 8 ++++---- storages/utils.py | 4 ++++ tests/test_sftp.py | 12 +++++++++--- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/setup.cfg b/setup.cfg index 46bbdce4e..d01e99546 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,7 +46,7 @@ google = libcloud = apache-libcloud sftp = - paramiko + paramiko >= 1.10.0 [flake8] exclude = diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 7e2738416..837b657bf 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -15,6 +15,7 @@ from storages.utils import check_location from storages.utils import clean_name from storages.utils import get_available_overwrite_name +from storages.utils import is_seekable from storages.utils import safe_join from storages.utils import setting from storages.utils import to_bytes @@ -194,7 +195,7 @@ def _save(self, name, content): for prop, val in blob_params.items(): setattr(file_object.blob, prop, val) - rewind = not hasattr(content, 'seekable') or content.seekable() + rewind = is_seekable(content) file_object.blob.upload_from_file( content, rewind=rewind, diff --git a/storages/backends/s3boto3.py b/storages/backends/s3boto3.py index 9af0dbb3a..28ed7dd6e 100644 --- a/storages/backends/s3boto3.py +++ b/storages/backends/s3boto3.py @@ -24,6 +24,7 @@ from storages.compress import CompressStorageMixin from storages.utils import check_location from storages.utils import get_available_overwrite_name +from storages.utils import is_seekable from storages.utils import lookup_env from storages.utils import safe_join from storages.utils import setting @@ -445,7 +446,7 @@ def _save(self, name, content): name = self._normalize_name(cleaned_name) params = self._get_write_parameters(name, content) - if not hasattr(content, 'seekable') or content.seekable(): + if is_seekable(content): content.seek(0, os.SEEK_SET) if (self.gzip and params['ContentType'] in self.gzip_content_types and diff --git a/storages/backends/sftpstorage.py b/storages/backends/sftpstorage.py index 643685c88..34a5755c2 100644 --- a/storages/backends/sftpstorage.py +++ b/storages/backends/sftpstorage.py @@ -17,6 +17,7 @@ from django.utils.deconstruct import deconstructible from storages.base import BaseStorage +from storages.utils import is_seekable from storages.utils import setting @@ -123,15 +124,14 @@ def _mkdir(self, path): def _save(self, name, content): """Save file via SFTP.""" - content.open() + if is_seekable(content): + content.seek(0, os.SEEK_SET) path = self._remote_path(name) dirname = posixpath.dirname(path) if not self.exists(dirname): self._mkdir(dirname) - f = self.sftp.open(path, 'wb') - f.write(content.file.read()) - f.close() + self.sftp.putfo(content, path) # set file permissions if configured if self._file_mode is not None: diff --git a/storages/utils.py b/storages/utils.py index ce7b54af1..4a5d8f41c 100644 --- a/storages/utils.py +++ b/storages/utils.py @@ -125,3 +125,7 @@ def get_available_overwrite_name(name, max_length): 'allows sufficient "max_length".' % name ) return os.path.join(dir_name, "{}{}".format(file_root, file_ext)) + + +def is_seekable(file_object): + return not hasattr(file_object, 'seekable') or file_object.seekable() diff --git a/tests/test_sftp.py b/tests/test_sftp.py index a7fd99223..703a6b272 100644 --- a/tests/test_sftp.py +++ b/tests/test_sftp.py @@ -12,6 +12,7 @@ from django.test import override_settings from storages.backends import sftpstorage +from tests.utils import NonSeekableContentFile class SFTPStorageTest(TestCase): @@ -69,7 +70,12 @@ def test_mkdir_parent(self, mock_sftp): @patch('storages.backends.sftpstorage.SFTPStorage.sftp') def test_save(self, mock_sftp): self.storage._save('foo', File(io.BytesIO(b'foo'), 'foo')) - self.assertTrue(mock_sftp.open.return_value.write.called) + self.assertTrue(mock_sftp.putfo.called) + + @patch('storages.backends.sftpstorage.SFTPStorage.sftp') + def test_save_non_seekable(self, mock_sftp): + self.storage._save('foo', NonSeekableContentFile('foo')) + self.assertTrue(mock_sftp.putfo.called) @patch('storages.backends.sftpstorage.SFTPStorage.sftp', **{ 'stat.side_effect': (FileNotFoundError(), True) @@ -77,7 +83,7 @@ def test_save(self, mock_sftp): def test_save_in_subdir(self, mock_sftp): self.storage._save('bar/foo', File(io.BytesIO(b'foo'), 'foo')) self.assertEqual(mock_sftp.mkdir.call_args_list[0][0], ('bar',)) - self.assertTrue(mock_sftp.open.return_value.write.called) + self.assertTrue(mock_sftp.putfo.called) @patch('storages.backends.sftpstorage.SFTPStorage.sftp') def test_delete(self, mock_sftp): @@ -212,4 +218,4 @@ def test_write(self): def test_close(self, mock_sftp): self.file.write(b'foo') self.file.close() - self.assertTrue(mock_sftp.open.return_value.write.called) + self.assertTrue(mock_sftp.putfo.called)