Skip to content

Commit

Permalink
[sftp] Improve write & memory performance when saving files (#1194)
Browse files Browse the repository at this point in the history
* Move seekability test to a separate helper function

Having a third copy of this code would feel silly.

* Stop calling content.open() from SFTPStorage._save

The open method isn't available in all file-like objects one might want to
save using the storage framework, so instead of relying on it calling
seek(0) for us do it ourselves (conditionally).

* Require at least version 1.10.0 of paramiko

This version introduced the SFTPClient.putfo() method. It was released in
early 2013, so this shouldn't be a great hardship.

* Use SFTPClient.putfo() in SFTPStorage._save()

Instead of using SFTPClient.open() to get a file-like object, reading the
entire content into memory and then calling .write() on that.

The measured performance difference is stunning, and the uploaded file data
no longer needs to fit into memory.
  • Loading branch information
vainu-arto committed Nov 9, 2022
1 parent b04de35 commit 04676b5
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 10 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -46,7 +46,7 @@ google =
libcloud =
apache-libcloud
sftp =
paramiko
paramiko >= 1.10.0

[flake8]
exclude =
Expand Down
3 changes: 2 additions & 1 deletion storages/backends/gcloud.py
Expand Up @@ -15,6 +15,7 @@
from storages.utils import check_location
from storages.utils import clean_name
from storages.utils import get_available_overwrite_name
from storages.utils import is_seekable
from storages.utils import safe_join
from storages.utils import setting
from storages.utils import to_bytes
Expand Down Expand Up @@ -194,7 +195,7 @@ def _save(self, name, content):
for prop, val in blob_params.items():
setattr(file_object.blob, prop, val)

rewind = not hasattr(content, 'seekable') or content.seekable()
rewind = is_seekable(content)
file_object.blob.upload_from_file(
content,
rewind=rewind,
Expand Down
3 changes: 2 additions & 1 deletion storages/backends/s3boto3.py
Expand Up @@ -24,6 +24,7 @@
from storages.compress import CompressStorageMixin
from storages.utils import check_location
from storages.utils import get_available_overwrite_name
from storages.utils import is_seekable
from storages.utils import lookup_env
from storages.utils import safe_join
from storages.utils import setting
Expand Down Expand Up @@ -445,7 +446,7 @@ def _save(self, name, content):
name = self._normalize_name(cleaned_name)
params = self._get_write_parameters(name, content)

if not hasattr(content, 'seekable') or content.seekable():
if is_seekable(content):
content.seek(0, os.SEEK_SET)
if (self.gzip and
params['ContentType'] in self.gzip_content_types and
Expand Down
8 changes: 4 additions & 4 deletions storages/backends/sftpstorage.py
Expand Up @@ -17,6 +17,7 @@
from django.utils.deconstruct import deconstructible

from storages.base import BaseStorage
from storages.utils import is_seekable
from storages.utils import setting


Expand Down Expand Up @@ -123,15 +124,14 @@ def _mkdir(self, path):

def _save(self, name, content):
"""Save file via SFTP."""
content.open()
if is_seekable(content):
content.seek(0, os.SEEK_SET)
path = self._remote_path(name)
dirname = posixpath.dirname(path)
if not self.exists(dirname):
self._mkdir(dirname)

f = self.sftp.open(path, 'wb')
f.write(content.file.read())
f.close()
self.sftp.putfo(content, path)

# set file permissions if configured
if self._file_mode is not None:
Expand Down
4 changes: 4 additions & 0 deletions storages/utils.py
Expand Up @@ -125,3 +125,7 @@ def get_available_overwrite_name(name, max_length):
'allows sufficient "max_length".' % name
)
return os.path.join(dir_name, "{}{}".format(file_root, file_ext))


def is_seekable(file_object):
return not hasattr(file_object, 'seekable') or file_object.seekable()
12 changes: 9 additions & 3 deletions tests/test_sftp.py
Expand Up @@ -12,6 +12,7 @@
from django.test import override_settings

from storages.backends import sftpstorage
from tests.utils import NonSeekableContentFile


class SFTPStorageTest(TestCase):
Expand Down Expand Up @@ -69,15 +70,20 @@ def test_mkdir_parent(self, mock_sftp):
@patch('storages.backends.sftpstorage.SFTPStorage.sftp')
def test_save(self, mock_sftp):
self.storage._save('foo', File(io.BytesIO(b'foo'), 'foo'))
self.assertTrue(mock_sftp.open.return_value.write.called)
self.assertTrue(mock_sftp.putfo.called)

@patch('storages.backends.sftpstorage.SFTPStorage.sftp')
def test_save_non_seekable(self, mock_sftp):
self.storage._save('foo', NonSeekableContentFile('foo'))
self.assertTrue(mock_sftp.putfo.called)

@patch('storages.backends.sftpstorage.SFTPStorage.sftp', **{
'stat.side_effect': (FileNotFoundError(), True)
})
def test_save_in_subdir(self, mock_sftp):
self.storage._save('bar/foo', File(io.BytesIO(b'foo'), 'foo'))
self.assertEqual(mock_sftp.mkdir.call_args_list[0][0], ('bar',))
self.assertTrue(mock_sftp.open.return_value.write.called)
self.assertTrue(mock_sftp.putfo.called)

@patch('storages.backends.sftpstorage.SFTPStorage.sftp')
def test_delete(self, mock_sftp):
Expand Down Expand Up @@ -212,4 +218,4 @@ def test_write(self):
def test_close(self, mock_sftp):
self.file.write(b'foo')
self.file.close()
self.assertTrue(mock_sftp.open.return_value.write.called)
self.assertTrue(mock_sftp.putfo.called)

0 comments on commit 04676b5

Please sign in to comment.