Skip to content

Commit

Permalink
Add copy S3 objetcs features (#23)
Browse files Browse the repository at this point in the history
* tests: Add unit tests for copy features

* feat: Add S3 copy features
- Copy object
- Copy keys
- Copy prefix

* docs: Update documentation
* chore: Add Python 3.10 for validation tests
   - actions/setup-python#175

* Update package versions (#24)
- From dependabot alerts

* chore: Bump minor version
  • Loading branch information
FerrariDG committed Jun 7, 2022
1 parent f434ed3 commit bdeffa7
Show file tree
Hide file tree
Showing 9 changed files with 489 additions and 142 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: '3.7'

- name: Install Poetry
run: |
Expand All @@ -44,7 +44,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9]
python-version: ['3.8', '3.9', '3.10']

steps:
- name: Checkout code
Expand Down
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -13,7 +13,7 @@ AWS S3 Tools is a Python package to make it easier to interact with S3 objects,
- Check if an S3 object exists
- Download/upload S3 objects to/from local files
- Read/write S3 objects into/from Python variables
- Delete/Move S3 objects
- Delete/move/copy S3 objects

The AWS S3 authentication is done via boto3 package, via environment variables, aws config file, or parameters.
All S3 objects functions, in this package, have the option to set AWS Session authentication by passing the following dictionary on the `aws_auth` parameter, with the schema below (not all field are required).
Expand Down
8 changes: 8 additions & 0 deletions docs/source/objects.rst
Expand Up @@ -11,6 +11,14 @@ Check
:undoc-members:
:show-inheritance:

Copy
-----

.. automodule:: s3_tools.objects.copy
:members:
:undoc-members:
:show-inheritance:

Delete
------

Expand Down
4 changes: 2 additions & 2 deletions docs/source/requirements.txt
@@ -1,3 +1,3 @@
boto3 == 1.17.8
ujson == 5.1.0
boto3 == 1.24.2
ujson == 5.3.0
toml == 0.10.2
268 changes: 134 additions & 134 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "aws-s3-tools"
version = "0.1.1"
version = "0.2.0"
description = "AWS S3 tools package"
authors = ["Daniel Ferrari"]
maintainers = ["Daniel Ferrari, Carlos Alves, Tomás Osório"]
Expand Down Expand Up @@ -34,8 +34,8 @@ classifiers = [
[tool.poetry.dependencies]
python = "^3.7"

boto3 = "^1.21.12"
ujson = "^5.1.0"
boto3 = "^1.24.2"
ujson = "^5.3.0"
rich = {version = "^11.2.0", optional = true}
types-ujson = "^4.2.1"

Expand Down
6 changes: 6 additions & 0 deletions s3_tools/__init__.py
Expand Up @@ -19,6 +19,12 @@
object_exists,
)

from s3_tools.objects.copy import (
copy_object,
copy_keys,
copy_prefix,
)

from s3_tools.objects.delete import (
delete_keys,
delete_object,
Expand Down
197 changes: 197 additions & 0 deletions s3_tools/objects/copy.py
@@ -0,0 +1,197 @@
"""Copy S3 objects."""
from concurrent import futures
from typing import (
Dict,
List,
Optional,
Tuple,
)

import boto3

from s3_tools.objects.list import list_objects


def copy_object(
source_bucket: str,
source_key: str,
destination_bucket: str,
destination_key: str,
aws_auth: Dict[str, str] = {}
) -> None:
"""Copy S3 object from source bucket and key to destination.
Parameters
----------
source_bucket : str
S3 bucket where the object is stored.
source_key : str
S3 key where the object is referenced.
destination_bucket : str
S3 destination bucket.
destination_key : str
S3 destination key.
aws_auth: Dict[str, str]
Contains AWS credentials, by default is empty.
Examples
--------
>>> copy_object(
... source_bucket='bucket',
... source_key='myFiles/song.mp3',
... destination_bucket='bucket',
... destination_key='myMusic/song.mp3'
... )
"""
session = boto3.session.Session(**aws_auth)
s3 = session.resource("s3")

s3.meta.client.copy(
{'Bucket': source_bucket, 'Key': source_key},
destination_bucket,
destination_key
)


def copy_keys(
source_bucket: str,
source_keys: List[str],
destination_bucket: str,
destination_keys: List[str],
threads: int = 5,
aws_auth: Dict[str, str] = {}
) -> None:
"""Copy a list of S3 objects from source bucket to destination.
Parameters
----------
source_bucket : str
S3 bucket where the objects are stored.
source_keys : List[str]
S3 keys where the objects are referenced.
destination_bucket : str
S3 destination bucket.
destination_keys : List[str]
S3 destination keys.
threads : int, optional
Number of parallel uploads, by default 5.
aws_auth: Dict[str, str]
Contains AWS credentials, by default is empty.
Raises
------
IndexError
When the source_keys and destination_keys have different length.
ValueError
When the keys list is empty.
Examples
--------
>>> copy_keys(
... source_bucket='bucket',
... source_keys=[
... 'myFiles/song.mp3',
... 'myFiles/photo.jpg'
... ],
... destination_bucket='bucket',
... destination_keys=[
... 'myMusic/song.mp3',
... 'myPhotos/photo.jpg'
... ]
... )
"""
if len(source_keys) != len(destination_keys):
raise IndexError("Key lists must have the same length")

if len(source_keys) == 0:
raise ValueError("Key list length must be greater than zero")

with futures.ThreadPoolExecutor(max_workers=threads) as executor:
executors = (
executor.submit(copy_object, source_bucket, source, destination_bucket, destination, aws_auth)
for source, destination in zip(source_keys, destination_keys)
)

for ex in executors:
ex.result()


def copy_prefix(
source_bucket: str,
source_prefix: str,
destination_bucket: str,
change_prefix: Optional[Tuple[str, str]] = None,
filter_keys: Optional[str] = None,
threads: int = 5,
aws_auth: Dict[str, str] = {}
) -> None:
"""Copy S3 objects from source bucket to destination based on prefix filter.
Parameters
----------
source_bucket : str
S3 bucket where the objects are stored.
source_prefix : str
S3 prefix where the objects are referenced.
destination_bucket : str
S3 destination bucket.
change_prefix : Tuple[str, str], optional
Text to be replaced in keys prefixes, by default is None.
The first element is the text to be replaced, the second is the replacement text.
filter_keys : str, optional
Basic search string to filter out keys on result (uses Unix shell-style wildcards), by default is None.
For more about the search check "fnmatch" package.
threads : int, optional
Number of parallel uploads, by default 5.
aws_auth: Dict[str, str]
Contains AWS credentials, by default is empty.
Examples
--------
>>> copy_prefix(
... source_bucket='MyBucket',
... source_prefix='myFiles',
... destination_bucket='OtherBucket',
... filter_keys='*images*',
... change_prefix=('myFiles', 'backup')
... )
"""
source_keys = list_objects(
bucket=source_bucket,
prefix=source_prefix,
search_str=filter_keys,
aws_auth=aws_auth
)

destination_keys = source_keys if change_prefix is None else [
key.replace(change_prefix[0], change_prefix[1])
for key in source_keys
]

copy_keys(
source_bucket=source_bucket,
source_keys=source_keys,
destination_bucket=destination_bucket,
destination_keys=destination_keys,
threads=threads,
aws_auth=aws_auth
)

0 comments on commit bdeffa7

Please sign in to comment.