From 13365ef2a3166ff5a11004db94e66fa5e2c43772 Mon Sep 17 00:00:00 2001 From: jkushner Date: Fri, 21 Oct 2022 12:03:23 +0200 Subject: [PATCH 1/4] Initial GCS S3 support --- .../new/internal/artifacts/drivers/s3.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/neptune/new/internal/artifacts/drivers/s3.py b/src/neptune/new/internal/artifacts/drivers/s3.py index c4a9a1a8a..2c1b18ed8 100644 --- a/src/neptune/new/internal/artifacts/drivers/s3.py +++ b/src/neptune/new/internal/artifacts/drivers/s3.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os import pathlib import typing from datetime import datetime @@ -36,6 +37,18 @@ class S3ArtifactDriver(ArtifactDriver): DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" + @staticmethod + def get_boto_resource(): + access_key_id = os.getenv("S3_ACCESS_KEY_ID") + secret_access_key = os.getenv("S3_SECRET_ACCESS_KEY") + endpoint_url = os.getenv("S3_ENDPOINT_URL") + return boto3.resource( + service_name="s3", + aws_access_key_id=access_key_id, + aws_secret_access_key=secret_access_key, + endpoint_url=endpoint_url, + ) + @staticmethod def get_type() -> str: return ArtifactFileType.S3.value @@ -69,7 +82,7 @@ def get_tracked_files(cls, path: str, destination: str = None) -> typing.List[Ar ) # pylint: disable=no-member - remote_storage = boto3.resource("s3").Bucket(bucket_name) + remote_storage = cls.get_boto_resource().Bucket(bucket_name) stored_files: typing.List[ArtifactFileData] = list() @@ -115,7 +128,7 @@ def download_file(cls, destination: pathlib.Path, file_definition: ArtifactFileD url = urlparse(location) bucket_name, path = url.netloc, url.path.lstrip("/") - remote_storage = boto3.resource("s3") + remote_storage = cls.get_boto_resource() try: # pylint: disable=no-member bucket = remote_storage.Bucket(bucket_name) From 6d50fafaf7b27210e6af283bac9158c5cf4c8f1b Mon Sep 17 00:00:00 2001 From: Jakub Kuszneruk Date: Fri, 21 Oct 2022 16:11:10 +0200 Subject: [PATCH 2/4] Boto3 S3_ENDPOINT_URL config --- src/neptune/new/internal/artifacts/drivers/s3.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/neptune/new/internal/artifacts/drivers/s3.py b/src/neptune/new/internal/artifacts/drivers/s3.py index 2c1b18ed8..0cc096761 100644 --- a/src/neptune/new/internal/artifacts/drivers/s3.py +++ b/src/neptune/new/internal/artifacts/drivers/s3.py @@ -39,13 +39,17 @@ class S3ArtifactDriver(ArtifactDriver): @staticmethod def get_boto_resource(): - access_key_id = os.getenv("S3_ACCESS_KEY_ID") - secret_access_key = os.getenv("S3_SECRET_ACCESS_KEY") + """ + User might want to use other than `AWS` `S3` providers, so we should be able to override `endpoint_url`. + Unfortunately `boto3` doesn't support this parameter in configuration, so we'll have to create our env variable. + boto3 supported config envs: + * https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables + boto3 `endpoint_url` support PR: + * https://github.com/boto/boto3/pull/2746 + """ endpoint_url = os.getenv("S3_ENDPOINT_URL") return boto3.resource( service_name="s3", - aws_access_key_id=access_key_id, - aws_secret_access_key=secret_access_key, endpoint_url=endpoint_url, ) From 59b96f51e8efa7a3288d648b2b728ee268600c63 Mon Sep 17 00:00:00 2001 From: Jakub Kuszneruk Date: Tue, 25 Oct 2022 10:20:50 +0200 Subject: [PATCH 3/4] Post review fixes --- CHANGELOG.md | 1 + src/neptune/new/envs.py | 2 ++ src/neptune/new/internal/artifacts/drivers/s3.py | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90d7e5eb6..c7a530f92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - `get_last_run`, `get_run_url`, `get_project` and `neptune.init` marked as deprecated ([#1011](https://github.com/neptune-ai/neptune-client/pull/1011)) - Deprecated implicit casting of objects to strings with `log` and `assign` operations ([#1028](https://github.com/neptune-ai/neptune-client/pull/1028)) - Internally extracted legacy client to `legacy` submodule ([#1039](https://github.com/neptune-ai/neptune-client/pull/1039)) +- Track artifacts on S3 compatible storage ([#1053](https://github.com/neptune-ai/neptune-client/pull/1053)) ## neptune-client 0.16.9 diff --git a/src/neptune/new/envs.py b/src/neptune/new/envs.py index 221e6755e..a8625fd9b 100644 --- a/src/neptune/new/envs.py +++ b/src/neptune/new/envs.py @@ -37,3 +37,5 @@ NEPTUNE_SUBPROCESS_KILL_TIMEOUT = "NEPTUNE_SUBPROCESS_KILL_TIMEOUT" NEPTUNE_FETCH_TABLE_STEP_SIZE = "NEPTUNE_FETCH_TABLE_STEP_SIZE" + +S3_ENDPOINT_URL = "S3_ENDPOINT_URL" diff --git a/src/neptune/new/internal/artifacts/drivers/s3.py b/src/neptune/new/internal/artifacts/drivers/s3.py index 0cc096761..8a198729f 100644 --- a/src/neptune/new/internal/artifacts/drivers/s3.py +++ b/src/neptune/new/internal/artifacts/drivers/s3.py @@ -22,6 +22,7 @@ import boto3 from botocore.exceptions import NoCredentialsError +from neptune.new.envs import S3_ENDPOINT_URL from neptune.new.exceptions import ( NeptuneRemoteStorageAccessException, NeptuneRemoteStorageCredentialsException, @@ -47,7 +48,7 @@ def get_boto_resource(): boto3 `endpoint_url` support PR: * https://github.com/boto/boto3/pull/2746 """ - endpoint_url = os.getenv("S3_ENDPOINT_URL") + endpoint_url = os.getenv(S3_ENDPOINT_URL) return boto3.resource( service_name="s3", endpoint_url=endpoint_url, From 56b00c23ff7dfe001fcbbc7938bdde70995e2ba8 Mon Sep 17 00:00:00 2001 From: Jakub Kuszneruk Date: Tue, 25 Oct 2022 10:23:15 +0200 Subject: [PATCH 4/4] Update changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7a530f92..5ba6fe48b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## [UNRELEASED] neptune-client 0.16.10 +### Features +- Track artifacts on S3 compatible storage ([#1053](https://github.com/neptune-ai/neptune-client/pull/1053)) + ### Fixes - Update jsonschema requirement with explicit `format` specifier ([#1010](https://github.com/neptune-ai/neptune-client/pull/1010)) - Escape inputs to SQL in Artifact LocalFileHashStorage ([#1034](https://github.com/neptune-ai/neptune-client/pull/1034)) @@ -11,7 +14,6 @@ - `get_last_run`, `get_run_url`, `get_project` and `neptune.init` marked as deprecated ([#1011](https://github.com/neptune-ai/neptune-client/pull/1011)) - Deprecated implicit casting of objects to strings with `log` and `assign` operations ([#1028](https://github.com/neptune-ai/neptune-client/pull/1028)) - Internally extracted legacy client to `legacy` submodule ([#1039](https://github.com/neptune-ai/neptune-client/pull/1039)) -- Track artifacts on S3 compatible storage ([#1053](https://github.com/neptune-ai/neptune-client/pull/1053)) ## neptune-client 0.16.9