Skip to content

Commit

Permalink
enh: missing S3 endpoint URL is verbosely communicated
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Apr 24, 2024
1 parent 503902b commit be672ce
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
0.58.7
- docs: document S3 environment variables
- enh: warn user about missing endpoint URLs in S3 format and raise an
exception when push comes to shove
0.58.6
- enh: for access to private S3 data, introduce the environment
variables `DCLAB_S3_ENDPOINT_URL`, `DCLAB_S3_ACCESS_KEY_ID`,
Expand Down
11 changes: 11 additions & 0 deletions dclab/rtdc_dataset/fmt_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import socket
from urllib.parse import urlparse
import warnings


try:
Expand Down Expand Up @@ -67,6 +68,11 @@ def __init__(self,
verify_ssl: bool
make sure the SSL certificate is sound, only used for testing
"""
if endpoint_url is None:
raise ValueError(
"The S3 endpoint URL is empty. This could mean that you did "
"not specify the full S3 URL or that you forgot to set "
"the `S3_ENDPOINT_URL` environment variable.")
endpoint_url = endpoint_url.strip().rstrip("/")
self.botocore_session = botocore.session.get_session()
self.s3_session = boto3.Session(
Expand Down Expand Up @@ -230,6 +236,11 @@ def is_s3_object_available(url: str,
avail = False
if is_s3_url(url):
endpoint_url = get_endpoint_url(url) or S3_ENDPOINT_URL
if not endpoint_url:
warnings.warn(
f"Could not determine endpoint from URL '{url}'. Please "
f"set the `S3_ENDPOINT_URL` environment variable or pass "
f"a full object URL.")
# default to https if no scheme or port is specified
urlp = urlparse(endpoint_url)
port = urlp.port or (80 if urlp.scheme == "http" else 443)
Expand Down

0 comments on commit be672ce

Please sign in to comment.