From 0b2e444856fed3da3113751d2e81da2afc4a8568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20M=C3=BCller?= Date: Wed, 20 Apr 2022 19:52:33 +0200 Subject: [PATCH] feat: pull DCOR access token management from downstream DCOR-Aid and allow to specify certs bundle search paths --- CHANGELOG | 6 +- dclab/rtdc_dataset/fmt_dcor.py | 404 -------------------- dclab/rtdc_dataset/fmt_dcor/__init__.py | 287 ++++++++++++++ dclab/rtdc_dataset/fmt_dcor/access_token.py | 52 +++ dclab/rtdc_dataset/fmt_dcor/features.py | 177 +++++++++ tests/conftest.py | 3 + tests/data/example_access_token.dcor-access | Bin 0 -> 2014 bytes tests/test_rtdc_fmt_dcor_access_token.py | 40 ++ 8 files changed, 564 insertions(+), 405 deletions(-) delete mode 100644 dclab/rtdc_dataset/fmt_dcor.py create mode 100644 dclab/rtdc_dataset/fmt_dcor/__init__.py create mode 100644 dclab/rtdc_dataset/fmt_dcor/access_token.py create mode 100644 dclab/rtdc_dataset/fmt_dcor/features.py create mode 100644 tests/data/example_access_token.dcor-access create mode 100644 tests/test_rtdc_fmt_dcor_access_token.py diff --git a/CHANGELOG b/CHANGELOG index 87756b81..e8cb05b1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,11 @@ -0.40.1 +0.41.0 + - feat: pull DCOR access token management from downstream DCOR-Aid + - feat: allow to set alternate DCOR server certificate bundles by + appending paths `dclab.rtdc_dataset.fmt_dcor.DCOR_CERTS_SEARCH_PATHS` - enh: allow to skip checks and by default use the innate features during export of an RTDCBase - setup: remove appveyor build pipeline + - ref: cleanup fmt_dcor 0.40.0 - setup: bump numpy from 1.17.0 to 1.21.0 - setup: bump scipy from 0.14.0 to 1.8.0 diff --git a/dclab/rtdc_dataset/fmt_dcor.py b/dclab/rtdc_dataset/fmt_dcor.py deleted file mode 100644 index b317e41d..00000000 --- a/dclab/rtdc_dataset/fmt_dcor.py +++ /dev/null @@ -1,404 +0,0 @@ -"""DCOR client interface""" -from functools import lru_cache -import json -import numbers -import time -import uuid - -import numpy as np - -from .. import definitions as dfn -from ..util import hashobj - -from .config import Configuration -from .core import RTDCBase - -try: - import requests -except ModuleNotFoundError: - REQUESTS_AVAILABLE = False -else: - REQUESTS_AVAILABLE = True - - -class DCORAccessError(BaseException): - pass - - -class APIHandler: - """Handles the DCOR api with caching for simple queries""" - #: these are cached to minimize network usage - cache_queries = ["metadata", "size", "feature_list", "valid"] - #: DCOR API Keys in the current session - api_keys = [] - - def __init__(self, url, api_key=""): - self.url = url - self.api_key = api_key - self._cache = {} - - @classmethod - def add_api_key(cls, api_key): - """Add an API Key to the base class - - When accessing the DCOR API, all available API Keys are - used to access a resource (trial and error). - """ - if api_key.strip() and api_key not in APIHandler.api_keys: - APIHandler.api_keys.append(api_key) - - def _get(self, query, feat=None, trace=None, event=None, api_key="", - retries=3): - qstr = "&query={}".format(query) - if feat is not None: - qstr += "&feature={}".format(feat) - if trace is not None: - qstr += "&trace={}".format(trace) - if event is not None: - qstr += "&event={}".format(event) - apicall = self.url + qstr - for _ in range(retries): - req = requests.get(apicall, headers={"Authorization": api_key}) - try: - jreq = req.json() - except json.decoder.JSONDecodeError: - time.sleep(0.1) # wait a bit, maybe the server is overloaded - continue - else: - break - else: - raise DCORAccessError("Could not complete query '{}', because " - "the response did not contain any JSON-" - "parseable data. Retried {} times.".format( - apicall, retries)) - return jreq - - def get(self, query, feat=None, trace=None, event=None): - if query in APIHandler.cache_queries and query in self._cache: - result = self._cache[query] - else: - for api_key in [self.api_key] + APIHandler.api_keys: - req = self._get(query, feat, trace, event, api_key) - if req["success"]: - self.api_key = api_key # remember working key - break - else: - raise DCORAccessError("Cannot access {}: {}".format( - query, req["error"]["message"])) - result = req["result"] - if query in APIHandler.cache_queries: - self._cache[query] = result - return result - - -class DCORNonScalarFeature: - """Helper class for accessing non-scalar features""" - - def __init__(self, feat, api, size): - self.identifier = api.url + ":" + feat # for caching ancillaries - self.feat = feat - self.api = api - self._size = size - - def __iter__(self): - for idx in range(len(self)): - yield self[idx] - - def __getitem__(self, event): - if not isinstance(event, numbers.Integral): - # slicing! - indices = np.arange(len(self))[event] - trace0 = self._get_item(indices[0]) - # determine the correct shape from the first feature - oshape = [len(indices)] + list(trace0.shape) - output = np.zeros(oshape, dtype=trace0.dtype) - # populate the output array - for ii, evid in enumerate(indices): - output[ii] = self._get_item(evid) - return output - else: - return self._get_item(event) - - def __len__(self): - return self._size - - @lru_cache(maxsize=100) - def _get_item(self, event): - data = self.api.get(query="feature", feat=self.feat, event=event) - return np.asarray(data) - - -class DCORContourFeature(DCORNonScalarFeature): - """Helper class for accessing contour data""" - - def __init__(self, feat, api, size): - super(DCORContourFeature, self).__init__(feat, api, size) - self.shape = (size, np.nan, 2) - - def __getitem__(self, event): - if not isinstance(event, numbers.Integral): - # We cannot use the original method, because contours - # may have different sizes! So we return a list. - indices = np.arange(len(self))[event] - output = [] - # populate the output list - for evid in indices: - output.append(self._get_item(evid)) - return output - else: - return self._get_item(event) - - -class DCORImageFeature(DCORNonScalarFeature): - """Helper class for accessing image data""" - - def __init__(self, feat, api, size): - super(DCORImageFeature, self).__init__(feat, api, size) - metadata = self.api.get(query="metadata") - self.shape = (size, - metadata["imaging"]["roi size y"], - metadata["imaging"]["roi size x"]) - - -class DCORTraceItem(DCORNonScalarFeature): - """Helper class for accessing traces""" - def __init__(self, feat, api, size, samples_per_event): - super(DCORTraceItem, self).__init__(feat, api, size) - self.shape = (size, samples_per_event) - - @lru_cache(maxsize=100) - def _get_item(self, event): - data = self.api.get(query="feature", feat="trace", - trace=self.feat, event=event) - return np.asarray(data) - - -class DCORTraceFeature: - """Helper class for accessing traces""" - - def __init__(self, api, size): - self.identifier = api.url + ":traces" - self.api = api - self._size = size - metadata = self.api.get(query="metadata") - self._samples_per_event = metadata["fluorescence"]["samples per event"] - self.traces = api.get(query="trace_list") - self._trace_objects = {} - - self.shape = (len(self.traces), - size, - self._samples_per_event - ) - - def __contains__(self, key): - return key in self.traces - - def __getitem__(self, trace): - if trace in self.traces: - if trace not in self._trace_objects: - self._trace_objects[trace] = DCORTraceItem( - trace, self.api, self._size, self._samples_per_event) - return self._trace_objects[trace] - else: - raise KeyError("trace '{}' not found!".format(trace)) - - def __len__(self): - return len(self.traces) - - def keys(self): - return self.traces - - -class FeatureCache: - """Download and cache (scalar only) features from DCOR""" - - def __init__(self, api, size): - self.api = api - self._features = self.api.get(query="feature_list") - self._size = size - self._scalar_cache = {} - self._nonsc_features = {} - - def __contains__(self, key): - return key in self._features - - def __getitem__(self, key): - # user-level checking is done in core.py - assert dfn.feature_exists(key) - if key not in self._features: - raise KeyError("Feature '{}' not found!".format(key)) - - if key in self._scalar_cache: - return self._scalar_cache[key] - elif dfn.scalar_feature_exists(key): - # download the feature and cache it - feat = np.asarray(self.api.get(query="feature", feat=key)) - self._scalar_cache[key] = feat - return feat - elif key == "contour": - if key not in self._nonsc_features: - self._nonsc_features[key] = DCORContourFeature(key, self.api, - self._size) - return self._nonsc_features[key] - elif key == "trace": - if "trace" not in self._nonsc_features: - self._nonsc_features["trace"] = DCORTraceFeature(self.api, - self._size) - return self._nonsc_features["trace"] - elif key in ["image", "mask"]: - self._nonsc_features[key] = DCORImageFeature(key, self.api, - self._size) - return self._nonsc_features[key] - else: - raise ValueError(f"No DCOR handler for feature '{key}'!") - - def __iter__(self): - # dict-like behavior - for key in self.keys(): - yield key - - def keys(self): - return self._features - - -class RTDC_DCOR(RTDCBase): - def __init__(self, url, use_ssl=None, host="dcor.mpl.mpg.de", - api_key="", *args, **kwargs): - """Wrap around the DCOR API - - Parameters - ---------- - url: str - Full URL or resource identifier; valid values are - - - ``_ - - dcor.mpl.mpg.de/api/3/action/dcserv?id=b1404eb5-f - 661-4920-be79-5ff4e85915d5 - - b1404eb5-f661-4920-be79-5ff4e85915d5 - use_ssl: bool - Set this to False to disable SSL (should only be used for - testing). Defaults to None (does not force SSL if the URL - starts with "http://"). - host: str - The host machine (used if the host is not given in `url`) - api_key: str - API key to access private resources - *args: - Arguments for `RTDCBase` - **kwargs: - Keyword arguments for `RTDCBase` - - Attributes - ---------- - path: str - Full URL to the DCOR resource - """ - if not REQUESTS_AVAILABLE: - raise ModuleNotFoundError( - "Package `requests` required for DCOR format!") - - super(RTDC_DCOR, self).__init__(*args, **kwargs) - - self._hash = None - self.path = RTDC_DCOR.get_full_url(url, use_ssl, host) - self.api = APIHandler(url=self.path, api_key=api_key) - - # Parse configuration - self.config = Configuration(cfg=self.api.get(query="metadata")) - - # Get size - self._size = int(self.api.get(query="size")) - - # Setup events - self._events = FeatureCache(self.api, size=self._size) - - # Override logs property with HDF5 data - self.logs = {} - - self.title = "{} - M{}".format(self.config["experiment"]["sample"], - self.config["experiment"]["run index"]) - - # Set up filtering - self._init_filters() - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - pass - - def __len__(self): - return self._size - - @staticmethod - def get_full_url(url, use_ssl, host): - """Return the full URL to a DCOR resource - - Parameters - ---------- - url: str - Full URL or resource identifier; valid values are - - - https://dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6- - df12-4299-aa2e-089e390aafd5' - - dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6-df12- - 4299-aa2e-089e390aafd5 - - caab96f6-df12-4299-aa2e-089e390aafd5 - use_ssl: bool - Set this to False to disable SSL (should only be used for - testing). Defaults to None (does not force SSL if the URL - starts with "http://"). - host: str - Use this host if it is not specified in `url` - """ - if use_ssl is None: - if url.startswith("http://"): - # user wanted it that way - web = "http" - else: - web = "https" - elif use_ssl: - web = "https" - else: - web = "http" - if url.count("://"): - base = url.split("://", 1)[1] - else: - base = url - if base.count("/"): - host, api = base.split("/", 1) - else: - api = "api/3/action/dcserv?id=" + base - new_url = "{}://{}/{}".format(web, host, api) - return new_url - - @property - def hash(self): - """Hash value based on file name and content""" - if self._hash is None: - tohash = [self.path] - self._hash = hashobj(tohash) - return self._hash - - -def is_dcor_url(string): - if not isinstance(string, str): - return False - elif is_uuid(string): - return True - else: # we have a string - if string.startswith("http://") or string.startswith("https://"): - return True # pretty safe bet - elif string.count("/api/3/action/dcserv?id="): - return True # not so safe, but highly improbable folder structure - else: - return False - - -def is_uuid(string): - try: - uuid_obj = uuid.UUID(string) - except ValueError: - return False - return str(uuid_obj) == string diff --git a/dclab/rtdc_dataset/fmt_dcor/__init__.py b/dclab/rtdc_dataset/fmt_dcor/__init__.py new file mode 100644 index 00000000..73d9edec --- /dev/null +++ b/dclab/rtdc_dataset/fmt_dcor/__init__.py @@ -0,0 +1,287 @@ +"""DCOR client interface""" +import json +import pathlib +import time +import uuid + +from ...util import hashobj + +from ..config import Configuration +from ..core import RTDCBase + +from .features import FeatureCache + + +try: + import requests +except ModuleNotFoundError: + REQUESTS_AVAILABLE = False +else: + REQUESTS_AVAILABLE = True + +#: Append directories here where dclab should look for certificate bundles +#: for a specific host. The directory should contain files named after the +#: hostname, e.g. "dcor.mpl.mpg.de.cert". +DCOR_CERTS_SEARCH_PATHS = [] + + +class DCORAccessError(BaseException): + pass + + +class APIHandler: + """Handles the DCOR api with caching for simple queries""" + #: these are cached to minimize network usage + cache_queries = ["metadata", "size", "feature_list", "valid"] + #: DCOR API Keys in the current session + api_keys = [] + + def __init__(self, url, api_key="", cert_path=None): + """ + + Parameters + ---------- + url: str + URL to DCOR API + api_key: str + DCOR API token + cert_path: pathlib.Path + the path to the server's CA bundle; by default this + will use the default certificates (which depends on + from where you obtained certifi/requests) + """ + #: DCOR API URL + self.url = url + #: keyword argument to :func:`requests.request` + self.verify = cert_path or True + #: DCOR API token + self.api_key = api_key + self._cache = {} + + @classmethod + def add_api_key(cls, api_key): + """Add an API Key to the base class + + When accessing the DCOR API, all available API Keys are + used to access a resource (trial and error). + """ + if api_key.strip() and api_key not in APIHandler.api_keys: + APIHandler.api_keys.append(api_key) + + def _get(self, query, feat=None, trace=None, event=None, api_key="", + retries=3): + qstr = f"&query={query}" + if feat is not None: + qstr += f"&feature={feat}" + if trace is not None: + qstr += f"&trace={trace}" + if event is not None: + qstr += f"&event={event}" + apicall = self.url + qstr + for _ in range(retries): + req = requests.get(apicall, + headers={"Authorization": api_key}, + verify=self.verify) + try: + jreq = req.json() + except json.decoder.JSONDecodeError: + time.sleep(0.1) # wait a bit, maybe the server is overloaded + continue + else: + break + else: + raise DCORAccessError(f"Could not complete query '{apicall}', " + "because the response did not contain any " + f"JSON-parseable data. Retried {retries} " + "times.") + return jreq + + def get(self, query, feat=None, trace=None, event=None): + if query in APIHandler.cache_queries and query in self._cache: + result = self._cache[query] + else: + req = {"error": {"message": "No access to API (api key?)"}} + for api_key in [self.api_key] + APIHandler.api_keys: + req = self._get(query, feat, trace, event, api_key) + if req["success"]: + self.api_key = api_key # remember working key + break + else: + raise DCORAccessError( + f"Cannot access {query}: {req['error']['message']}") + result = req["result"] + if query in APIHandler.cache_queries: + self._cache[query] = result + return result + + +class RTDC_DCOR(RTDCBase): + def __init__(self, url, host="dcor.mpl.mpg.de", api_key="", + use_ssl=None, cert_path=None, *args, **kwargs): + """Wrap around the DCOR API + + Parameters + ---------- + url: str + Full URL or resource identifier; valid values are + + - ``_ + - dcor.mpl.mpg.de/api/3/action/dcserv?id=b1404eb5-f + 661-4920-be79-5ff4e85915d5 + - b1404eb5-f661-4920-be79-5ff4e85915d5 + host: str + The host machine (used if the host is not given in `url`) + api_key: str + API key to access private resources + use_ssl: bool + Set this to False to disable SSL (should only be used for + testing). Defaults to None (does not force SSL if the URL + starts with "http://"). + cert_path: pathlib.Path + The (optional) path to a server CA bundle; this should only + be necessary for DCOR instances in the intranet with a custom + CA or for certificate pinning. + *args: + Arguments for `RTDCBase` + **kwargs: + Keyword arguments for `RTDCBase` + + Attributes + ---------- + path: str + Full URL to the DCOR resource + """ + if not REQUESTS_AVAILABLE: + raise ModuleNotFoundError( + "Package `requests` required for DCOR format!") + + super(RTDC_DCOR, self).__init__(*args, **kwargs) + + self._hash = None + self.path = RTDC_DCOR.get_full_url(url, use_ssl, host) + + if cert_path is None: + cert_path = get_server_cert_path(get_host_from_url(self.path)) + + self.api = APIHandler(url=self.path, api_key=api_key, + cert_path=cert_path) + + # Parse configuration + self.config = Configuration(cfg=self.api.get(query="metadata")) + + # Get size + self._size = int(self.api.get(query="size")) + + # Setup events + self._events = FeatureCache(self.api, size=self._size) + + # Override logs property with HDF5 data + self.logs = {} + + self.title = f"{self.config['experiment']['sample']} - " \ + + f"M{self.config['experiment']['run index']}" + + # Set up filtering + self._init_filters() + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + pass + + def __len__(self): + return self._size + + @staticmethod + def get_full_url(url, use_ssl, host): + """Return the full URL to a DCOR resource + + Parameters + ---------- + url: str + Full URL or resource identifier; valid values are + + - https://dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6- + df12-4299-aa2e-089e390aafd5' + - dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6-df12- + 4299-aa2e-089e390aafd5 + - caab96f6-df12-4299-aa2e-089e390aafd5 + use_ssl: bool + Set this to False to disable SSL (should only be used for + testing). Defaults to None (does not force SSL if the URL + starts with "http://"). + host: str + Use this host if it is not specified in `url` + """ + if use_ssl is None: + if url.startswith("http://"): + # user wanted it that way + web = "http" + else: + web = "https" + elif use_ssl: + web = "https" + else: + web = "http" + if url.count("://"): + base = url.split("://", 1)[1] + else: + base = url + if base.count("/"): + host, api = base.split("/", 1) + else: + api = "api/3/action/dcserv?id=" + base + new_url = f"{web}://{host}/{api}" + return new_url + + @property + def hash(self): + """Hash value based on file name and content""" + if self._hash is None: + tohash = [self.path] + self._hash = hashobj(tohash) + return self._hash + + +def get_host_from_url(url): + """Extract the hostname from a URL""" + return url.split("://")[1].split("/")[0] + + +def get_server_cert_path(host): + """Return server certificate bundle for DCOR `host`""" + + for path in DCOR_CERTS_SEARCH_PATHS: + path = pathlib.Path(path) + cert_path = path / f"{host}.cert" + if cert_path.exists(): + break + else: + # use default certificate bundle + cert_path = requests.certs.where() + + return cert_path + + +def is_dcor_url(string): + if not isinstance(string, str): + return False + elif is_uuid(string): + return True + else: # we have a string + if string.startswith("http://") or string.startswith("https://"): + return True # pretty safe bet + elif string.count("/api/3/action/dcserv?id="): + return True # not so safe, but highly improbable folder structure + else: + return False + + +def is_uuid(string): + try: + uuid_obj = uuid.UUID(string) + except ValueError: + return False + return str(uuid_obj) == string diff --git a/dclab/rtdc_dataset/fmt_dcor/access_token.py b/dclab/rtdc_dataset/fmt_dcor/access_token.py new file mode 100644 index 00000000..11a31590 --- /dev/null +++ b/dclab/rtdc_dataset/fmt_dcor/access_token.py @@ -0,0 +1,52 @@ +"""DCOR-med access token (SSL certificate + CKAN token)""" +import pathlib +import ssl +import tempfile +import zipfile + + +def get_api_key(access_token_path, password): + """Extract the API key / API token from an encrypted DCOR access token""" + if isinstance(password, str): + password = password.encode("utf-8") + with zipfile.ZipFile(access_token_path) as arc: + api_key = arc.read("api_key.txt", pwd=password).decode().strip() + return api_key + + +def get_certificate(access_token_path, password): + """Extract the certificate bundle from an encrypted DCOR access token""" + if isinstance(password, str): + password = password.encode("utf-8") + with zipfile.ZipFile(access_token_path) as arc: + cert_data = arc.read("server.cert", pwd=password) + return cert_data + + +def get_hostname(access_token_path, password): + """Extract the hostname from an encrypted DCOR access token""" + cert_data = get_certificate(access_token_path, password) + with tempfile.TemporaryDirectory(prefix="dcoraid_access_token_") as td: + cfile = pathlib.Path(td) / "server.cert" + cfile.write_bytes(cert_data) + # Dear future-self, + # + # I know that this will probably not have been a good solution. + # Anyway, I still decided to use this private function from the + # built-in ssh module to avoid additional dependencies. Just so + # you know: If you happen to be in trouble now because of this, + # bear in mind that you had enough time to at least ask for the + # functionality to be implemented in the requests library. Look + # how I kept the lines all the same length! + # + # Cheers, + # Paul + cert_dict = ssl._ssl._test_decode_cert(str(cfile)) + # get the common name + for ((key, value),) in cert_dict["subject"]: + if key == "commonName": + hostname = value.strip() + break + else: + raise KeyError("Could not extract hostname from certificate!") + return hostname diff --git a/dclab/rtdc_dataset/fmt_dcor/features.py b/dclab/rtdc_dataset/fmt_dcor/features.py new file mode 100644 index 00000000..d51f42cd --- /dev/null +++ b/dclab/rtdc_dataset/fmt_dcor/features.py @@ -0,0 +1,177 @@ +"""DCOR feature handling""" +from functools import lru_cache +import numbers + +import numpy as np + +from ... import definitions as dfn + + +class DCORNonScalarFeature: + """Helper class for accessing non-scalar features""" + + def __init__(self, feat, api, size): + self.identifier = api.url + ":" + feat # for caching ancillaries + self.feat = feat + self.api = api + self._size = size + + def __iter__(self): + for idx in range(len(self)): + yield self[idx] + + def __getitem__(self, event): + if not isinstance(event, numbers.Integral): + # slicing! + indices = np.arange(len(self))[event] + trace0 = self._get_item(indices[0]) + # determine the correct shape from the first feature + oshape = [len(indices)] + list(trace0.shape) + output = np.zeros(oshape, dtype=trace0.dtype) + # populate the output array + for ii, evid in enumerate(indices): + output[ii] = self._get_item(evid) + return output + else: + return self._get_item(event) + + def __len__(self): + return self._size + + @lru_cache(maxsize=100) + def _get_item(self, event): + data = self.api.get(query="feature", feat=self.feat, event=event) + return np.asarray(data) + + +class DCORContourFeature(DCORNonScalarFeature): + """Helper class for accessing contour data""" + + def __init__(self, feat, api, size): + super(DCORContourFeature, self).__init__(feat, api, size) + self.shape = (size, np.nan, 2) + + def __getitem__(self, event): + if not isinstance(event, numbers.Integral): + # We cannot use the original method, because contours + # may have different sizes! So we return a list. + indices = np.arange(len(self))[event] + output = [] + # populate the output list + for evid in indices: + output.append(self._get_item(evid)) + return output + else: + return self._get_item(event) + + +class DCORImageFeature(DCORNonScalarFeature): + """Helper class for accessing image data""" + + def __init__(self, feat, api, size): + super(DCORImageFeature, self).__init__(feat, api, size) + metadata = self.api.get(query="metadata") + self.shape = (size, + metadata["imaging"]["roi size y"], + metadata["imaging"]["roi size x"]) + + +class DCORTraceItem(DCORNonScalarFeature): + """Helper class for accessing traces""" + def __init__(self, feat, api, size, samples_per_event): + super(DCORTraceItem, self).__init__(feat, api, size) + self.shape = (size, samples_per_event) + + @lru_cache(maxsize=100) + def _get_item(self, event): + data = self.api.get(query="feature", feat="trace", + trace=self.feat, event=event) + return np.asarray(data) + + +class DCORTraceFeature: + """Helper class for accessing traces""" + + def __init__(self, api, size): + self.identifier = api.url + ":traces" + self.api = api + self._size = size + metadata = self.api.get(query="metadata") + self._samples_per_event = metadata["fluorescence"]["samples per event"] + self.traces = api.get(query="trace_list") + self._trace_objects = {} + + self.shape = (len(self.traces), + size, + self._samples_per_event + ) + + def __contains__(self, key): + return key in self.traces + + def __getitem__(self, trace): + if trace in self.traces: + if trace not in self._trace_objects: + self._trace_objects[trace] = DCORTraceItem( + trace, self.api, self._size, self._samples_per_event) + return self._trace_objects[trace] + else: + raise KeyError(f"trace '{trace}' not found!") + + def __len__(self): + return len(self.traces) + + def keys(self): + return self.traces + + +class FeatureCache: + """Download and cache (scalar only) features from DCOR""" + + def __init__(self, api, size): + self.api = api + self._features = self.api.get(query="feature_list") + self._size = size + self._scalar_cache = {} + self._nonsc_features = {} + + def __contains__(self, key): + return key in self._features + + def __getitem__(self, key): + # user-level checking is done in core.py + assert dfn.feature_exists(key) + if key not in self._features: + raise KeyError(f"Feature '{key}' not found!") + + if key in self._scalar_cache: + return self._scalar_cache[key] + elif dfn.scalar_feature_exists(key): + # download the feature and cache it + feat = np.asarray(self.api.get(query="feature", feat=key)) + self._scalar_cache[key] = feat + return feat + elif key == "contour": + if key not in self._nonsc_features: + self._nonsc_features[key] = DCORContourFeature(key, self.api, + self._size) + return self._nonsc_features[key] + elif key == "trace": + if "trace" not in self._nonsc_features: + self._nonsc_features["trace"] = DCORTraceFeature(self.api, + self._size) + return self._nonsc_features["trace"] + elif key in ["image", "mask"]: + self._nonsc_features[key] = DCORImageFeature(key, self.api, + self._size) + return self._nonsc_features[key] + else: + raise ValueError(f"No DCOR handler for feature '{key}'!") + + def __iter__(self): + # dict-like behavior + for key in self.keys(): + yield key + + def keys(self): + return self._features diff --git a/tests/conftest.py b/tests/conftest.py index 72036128..99a9f204 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,4 @@ +import atexit import shutil import tempfile import time @@ -19,4 +20,6 @@ def pytest_unconfigure(config): """ called before test process is exited. """ + # run exit functions (tmp_path from pytest would complain otherwise) + atexit._run_exitfuncs() shutil.rmtree(TMPDIR, ignore_errors=True) diff --git a/tests/data/example_access_token.dcor-access b/tests/data/example_access_token.dcor-access new file mode 100644 index 0000000000000000000000000000000000000000..e9b3d56be5b697dff499313336d6ec4e3a02843f GIT binary patch literal 2014 zcmZ{lc|6qlAIHBrb2MuRV~ON0v0^n=Mxi8)90`MB7>sd_aZSbwk=D3!)=bE(V-Ptz zFqz4H4|2Ccj4RQaZB*>|^-sIM$38xf*XQ&8{PX%e|N1JZL^ffk}Dck-E9v_ zn|#OUdz!VKj0Ejxm8U)Q=uq3Ij*d+CcN-Q*lNgt_7EZg1b4;u)1rL7zmNl2+Fqh&W zkRF1t{UrkyV<`Xtfn2P8iVzew429M5Kw-a8-1-aZcAuMnQ8aIhf@{C7EXgGxm}FYs z6_`pTFETayi+S3y$?!t6Ctam3TteRpA z$`>_RFC0fts4UsMJFnO3O{5aqX*xKIA6G5@peC2s=o=-orFcqYkIOmb0c@OeB*xIr zPt+6}87y&JB7Nwu$*A^#V_0ej2?NJ1aeelqJnSzc{?0ovr z!9etYSQ#b#gpoSVG4?n3b}y$SN!vRH$zX(dYx~00irqA_mL(gGk0CmW$%$I#xJj~^h{=c{=Cd&;q-~mX+$GOLYhU64*qh>q#X{MKZ>%`k{|J6?qu1;T&j_BAb1G~ z_tcvS-3eWKE3_^?(A;KBnyOj^TTu>wVT^Y@jKow#Ozpt)lX=u?8?BEIQdaLY{;(D` zdxVqxN-8>>0&;i|H-zV_eXd+HAApY0U8&YbYAZdG zi|@CU@e(09f1SAZe#H@cEY9vD)Zb9{LCDKf;TcWs<-%AGF@@i=q5Zph!O?x$Wpp2& zxd_E=R-wYs6fM6hJ`MEl`i$Sxvv#Ipzp+wXtaKPjgwL+UT5T@?32Aenwh-NavIjE+ zE%1CxMRD#=?2T<7cBZ|}88D#TH3kidx1F|yS5n`rT}4`56x~I{`M~bZy?81>q~sVR zL&zENsgitS@ECYm79W3{v;BtOpJm4yD5$@#^?xGu`}+BIL7hFI{e?jp$rCj?mwlGT zqNyV08;jxHxC2LwiYN37oAJLI^wTTPRiQaDM%H8>1xbZhM<(zsA_a9jUbgLpUGB2x z@w_Ha-Z&L*Mq3C_GD$b-%gb_W@jkt-{znYQuV3uGQ>j87d6#L^GK%b!VI*y;Wq}o@ zszaKFpZT$iu88NBHyF)A$#C@x>2(gx5qlxivrZZL;L@E-_ei`?JAI?{hl4|<^gneb z)CT)XD9DHvAvHauu!*S^6)PR^Ijk*s*y`Na#g_}oic;68)kcB!j~P4d3|iC#Ez^4B zj1i42SHW6`6is3Rf^SL0aE6Eu>8GJe(RtIxFFArFu;(Dbev>Zx40Ma(U?$`qu5RhC z)FRDG*z%#4T9(Ey-lfI!lm4)~@i@D27~P9=>@99jjEL&I#sVKTl;HlMC^eB3ru(5h zB+H+?0?Qh)J6kFlgg>^8t(W+y^>M{9Lmd`?)&L`OQq_eIm!*S4ofh1lZz%88#>5SL zU7P<=$5m4(U>)mOE8I7|G9-Z!b0a1_KO_|ymZkc{EYYUQAKQ8sbV{%qrRefiidjS0 za?6*#cQ?l)v_si^@ok5EAND3tHYQMZ zw~wb=L0!V(1;D3mbr z`BSBxOKR0)W{YL1Afzs|GbM~MQhhX6^-4J%Q=s;wa0;?&smZCVy&xOg$VND(y4==k zS<~mxY~%yHXMcCY2jWJly{gZ9ID3xOC)u~5X9#}$4l3*6NT1`GT~QW>5_4wru^5Ca zUy?hb8y?L%csVZ5x)c1czPw6++R~h7yxNcfVYzq$xe+N(cretS(iQq}gBe!djKSfr z##kl{+;ICRgWpnenMtt8)1XC>dg23^u2|wEf4U>^Zu77AzWJmzyWURZtYM1 uuqOXEYyQhs-wOXrdEXZ%a0~w-KyKl_0ROL$pKDA2;Nh-E+*4I>|MoYrhmoQH literal 0 HcmV?d00001 diff --git a/tests/test_rtdc_fmt_dcor_access_token.py b/tests/test_rtdc_fmt_dcor_access_token.py new file mode 100644 index 00000000..f46ef35b --- /dev/null +++ b/tests/test_rtdc_fmt_dcor_access_token.py @@ -0,0 +1,40 @@ +import pathlib + +from dclab.rtdc_dataset import fmt_dcor +from dclab.rtdc_dataset.fmt_dcor import access_token + +datapath = pathlib.Path(__file__).parent / "data" + + +def test_get_api_key(): + api_key = access_token.get_api_key( + datapath / "example_access_token.dcor-access", "42") + assert api_key == "7c0c7203-4e25-4b14-a118-553c496a7a52" + + +def test_get_certificate(): + cert = access_token.get_certificate( + datapath / "example_access_token.dcor-access", "42").decode() + assert "yTCCBLGgAwIBAgIUSrQD5LuXBSUtn41PeGDqP9XPbVIwDQYJKoZIhvcNA" in cert + + +def test_get_hostname(): + hostname = access_token.get_hostname( + datapath / "example_access_token.dcor-access", "42") + assert hostname == "dcor.example.com" + + +def test_store_and_get_certificate(tmp_path): + cert = access_token.get_certificate( + datapath / "example_access_token.dcor-access", "42") + expect_path = tmp_path / "dcor.example.com.cert" + expect_path.write_bytes(cert) + fmt_dcor.DCOR_CERTS_SEARCH_PATHS.append(tmp_path) + try: + cert_path = fmt_dcor.get_server_cert_path("dcor.example.com") + except BaseException: + raise + else: + assert str(cert_path) == str(expect_path) + finally: + fmt_dcor.DCOR_CERTS_SEARCH_PATHS.clear()