Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track artifacts on S3 compatible storage #1053

Merged
merged 4 commits into from Oct 25, 2022
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 19 additions & 2 deletions src/neptune/new/internal/artifacts/drivers/s3.py
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
Raalsky marked this conversation as resolved.
Show resolved Hide resolved
# limitations under the License.
#
import os
import pathlib
import typing
from datetime import datetime
Expand All @@ -36,6 +37,22 @@
class S3ArtifactDriver(ArtifactDriver):
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"

@staticmethod
def get_boto_resource():
"""
User might want to use other than `AWS` `S3` providers, so we should be able to override `endpoint_url`.
Unfortunately `boto3` doesn't support this parameter in configuration, so we'll have to create our env variable.
boto3 supported config envs:
* https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables
boto3 `endpoint_url` support PR:
* https://github.com/boto/boto3/pull/2746
"""
endpoint_url = os.getenv("S3_ENDPOINT_URL")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably this should be a part of neptune.new.envs

return boto3.resource(
service_name="s3",
endpoint_url=endpoint_url,
)

@staticmethod
def get_type() -> str:
return ArtifactFileType.S3.value
Expand Down Expand Up @@ -69,7 +86,7 @@ def get_tracked_files(cls, path: str, destination: str = None) -> typing.List[Ar
)

# pylint: disable=no-member
remote_storage = boto3.resource("s3").Bucket(bucket_name)
remote_storage = cls.get_boto_resource().Bucket(bucket_name)

stored_files: typing.List[ArtifactFileData] = list()

Expand Down Expand Up @@ -115,7 +132,7 @@ def download_file(cls, destination: pathlib.Path, file_definition: ArtifactFileD
url = urlparse(location)
bucket_name, path = url.netloc, url.path.lstrip("/")

remote_storage = boto3.resource("s3")
remote_storage = cls.get_boto_resource()
try:
# pylint: disable=no-member
bucket = remote_storage.Bucket(bucket_name)
Expand Down