diff --git a/CHANGELOG.md b/CHANGELOG.md index 90d7e5eb6..5ba6fe48b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## [UNRELEASED] neptune-client 0.16.10 +### Features +- Track artifacts on S3 compatible storage ([#1053](https://github.com/neptune-ai/neptune-client/pull/1053)) + ### Fixes - Update jsonschema requirement with explicit `format` specifier ([#1010](https://github.com/neptune-ai/neptune-client/pull/1010)) - Escape inputs to SQL in Artifact LocalFileHashStorage ([#1034](https://github.com/neptune-ai/neptune-client/pull/1034)) diff --git a/src/neptune/new/envs.py b/src/neptune/new/envs.py index 221e6755e..a8625fd9b 100644 --- a/src/neptune/new/envs.py +++ b/src/neptune/new/envs.py @@ -37,3 +37,5 @@ NEPTUNE_SUBPROCESS_KILL_TIMEOUT = "NEPTUNE_SUBPROCESS_KILL_TIMEOUT" NEPTUNE_FETCH_TABLE_STEP_SIZE = "NEPTUNE_FETCH_TABLE_STEP_SIZE" + +S3_ENDPOINT_URL = "S3_ENDPOINT_URL" diff --git a/src/neptune/new/internal/artifacts/drivers/s3.py b/src/neptune/new/internal/artifacts/drivers/s3.py index c4a9a1a8a..8a198729f 100644 --- a/src/neptune/new/internal/artifacts/drivers/s3.py +++ b/src/neptune/new/internal/artifacts/drivers/s3.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os import pathlib import typing from datetime import datetime @@ -21,6 +22,7 @@ import boto3 from botocore.exceptions import NoCredentialsError +from neptune.new.envs import S3_ENDPOINT_URL from neptune.new.exceptions import ( NeptuneRemoteStorageAccessException, NeptuneRemoteStorageCredentialsException, @@ -36,6 +38,22 @@ class S3ArtifactDriver(ArtifactDriver): DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" + @staticmethod + def get_boto_resource(): + """ + User might want to use other than `AWS` `S3` providers, so we should be able to override `endpoint_url`. + Unfortunately `boto3` doesn't support this parameter in configuration, so we'll have to create our env variable. + boto3 supported config envs: + * https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables + boto3 `endpoint_url` support PR: + * https://github.com/boto/boto3/pull/2746 + """ + endpoint_url = os.getenv(S3_ENDPOINT_URL) + return boto3.resource( + service_name="s3", + endpoint_url=endpoint_url, + ) + @staticmethod def get_type() -> str: return ArtifactFileType.S3.value @@ -69,7 +87,7 @@ def get_tracked_files(cls, path: str, destination: str = None) -> typing.List[Ar ) # pylint: disable=no-member - remote_storage = boto3.resource("s3").Bucket(bucket_name) + remote_storage = cls.get_boto_resource().Bucket(bucket_name) stored_files: typing.List[ArtifactFileData] = list() @@ -115,7 +133,7 @@ def download_file(cls, destination: pathlib.Path, file_definition: ArtifactFileD url = urlparse(location) bucket_name, path = url.netloc, url.path.lstrip("/") - remote_storage = boto3.resource("s3") + remote_storage = cls.get_boto_resource() try: # pylint: disable=no-member bucket = remote_storage.Bucket(bucket_name)