change InputError to KeyError (#412)

* change InputError to KeyError * removing unused variable * adding link & merging barts' branch * fixing tests * fixing pre-commit * adding more tests * keep forgetting pre-commit * was better berfore somehow, let leave it at 77.8 * use get_time to crop ds, typo in test_forcing * removing tz when cropping ds
eWaterCycle · May 6, 2024 · b437045 · b437045
1 parent 12d3672
commit b437045
Show file tree

Hide file tree

Showing 2 changed files with 100 additions and 57 deletions.
diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py
@@ -4,14 +4,12 @@
 from typing import Type
 
 import fiona
-import numpy as np
 import pandas as pd
 import urllib3
 import xarray as xr
 from cartopy.io import shapereader
 
 from ewatercycle.base.forcing import DefaultForcing
-from ewatercycle.esmvaltool.schema import Dataset
 from ewatercycle.util import get_time
 
 COMMON_URL = "ca13056c-c347-4a27-b320-930c2a4dd207"
@@ -106,6 +104,45 @@ class CaravanForcing(DefaultForcing):
     https://gist.github.com/Daafip/ac1b030eb5563a76f4d02175f2716fd7
     """
 
+    @classmethod
+    def get_dataset(cls: Type["CaravanForcing"], dataset: str) -> xr.Dataset:
+        """Opens specified dataset from data.4tu.nl OPeNDAP server.
+
+        Args:
+            dataset (str): name of dataset, choose from:
+                'camels',
+                'camelsaus',
+                'camelsbr',
+                'camelscl',
+                'camelsgb',
+                'hysets',
+                'lamah'
+        """
+        return xr.open_dataset(f"{OPENDAP_URL}{dataset}.nc")
+
+    @classmethod
+    def get_basin_id(cls: Type["CaravanForcing"], dataset: str) -> list[str]:
+        """Gets a list of all the basin ids in provided dataset
+        Args:
+            dataset (str): name of dataset, choose from:
+                'camels',
+                'camelsaus',
+                'camelsbr',
+                'camelscl',
+                'camelsgb',
+                'hysets',
+                'lamah'
+
+        Note:
+            https://www.ewatercycle.org/caravan-map/ contains online a set of
+            interactive maps which allows exploration of the available catchments and
+            also contains the needed basin_ids.
+            Alternatively, a zip with shapefiles is available at
+            https://doi.org/10.4121/ca13056c-c347-4a27-b320-930c2a4dd207.v1 which also
+            allows exploration of the dataset.
+        """
+        return [val.decode() for val in cls.get_dataset(dataset).basin_id.values]
+
     @classmethod
     def generate(  # type: ignore[override]
         cls: Type["CaravanForcing"],
@@ -114,7 +151,6 @@ def generate(  # type: ignore[override]
         directory: str,
         variables: tuple[str, ...] = (),
         shape: str | Path | None = None,
-        dataset: str | Dataset | dict = "unused",
         **kwargs,
     ) -> "CaravanForcing":
         """Retrieve caravan for a model.
@@ -130,28 +166,32 @@ def generate(  # type: ignore[override]
                 if not specified will default to all.
             shape: (Optional) Path to a shape file.
                 If none is specified, will be downloaded automatically.
-            dataset: Unused
-
-            **kwargs:
-                basin_id: str containing the wanted basin_id. Data sets can be explored
-                using `CaravanForcing.get_dataset` or `CaravanForcing.get_basin_id`
-                More explanation in the example notebook mentioned above.
 
+        Kwargs:
+            basin_id: The ID of the desired basin. Data sets can be explored using
+                `CaravanForcing.get_dataset(dataset_name)` or
+                `CaravanForcing.get_basin_id(dataset_name)` where `dataset_name` is the
+                name of a dataset in Caravan (for example, "camels" or "camelsgb").
+                For more information do `help(CaravanForcing.get_basin_id)` or see
+                https://www.ewatercycle.org/caravan-map/.
         """
         if "basin_id" not in kwargs:
-            msg = "You have to specify a basin ID to be able to generate forcing from Caravan."
-            raise InputError(msg)
-        basin_id = kwargs["basin_id"]
+            msg = (
+                "You have to specify a basin ID to be able to generate forcing from"
+                " Caravan."
+            )
+            raise ValueError(msg)
+        basin_id = str(kwargs["basin_id"])
 
-        dataset = basin_id.split("_")[0]
-        ds = get_dataset(dataset)
+        dataset: str = basin_id.split("_")[0]
+        ds = cls.get_dataset(dataset)
         ds_basin = ds.sel(basin_id=basin_id.encode())
         ds_basin_time = crop_ds(ds_basin, start_time, end_time)
 
         if shape is None:
             shape = get_shapefiles(Path(directory), basin_id)
 
-        if variables == ():
+        if len(variables) == 0:
             variables = ds_basin_time.data_vars.keys()
 
         # only return the properties which are also in property vars
@@ -195,53 +235,16 @@ def generate(  # type: ignore[override]
         return forcing
 
 
-def get_dataset(dataset) -> xr.Dataset:
-    """Opens specified dataset from data.4tu.nl OPeNDAP server.
-        Args:
-            dataset (str): name of dataset, choose from:
-                'camels',
-                'camelsaus',
-                'camelsbr',
-                'camelscl',
-                'camelsgb',
-                'hysets',
-                'lamah'
-    """
-    return xr.open_dataset(f"{OPENDAP_URL}{dataset}.nc")
-
-
-def get_basin_id(dataset) -> list[str]:
-    """Gets a list of all the basin ids in provided dataset
-    Args:
-        dataset (str): name of dataset, choose from:
-            'camels',
-            'camelsaus',
-            'camelsbr',
-            'camelscl',
-            'camelsgb',
-            'hysets',
-            'lamah'
-
-    Note:
-        a zip with shapefiles is available at
-        https://doi.org/10.4121/ca13056c-c347-4a27-b320-930c2a4dd207.v1 which also
-        allows exploration of the dataset. 
-    """
-    return [val.decode() for val in get_dataset(dataset).basin_id.values]
-
-
-
 def get_shapefiles(directory: Path, basin_id: str) -> Path:
     """Retrieve shapefiles from data 4TU.nl ."""
     zip_path = directory / "shapefiles.zip"
     output_path = directory / "shapefiles"
     shape_path = directory / f"{basin_id}.shp"
+    combined_shapefile_path = output_path / "combined.shp"
 
     if not shape_path.is_file():
-        combined_shapefile_path = output_path / "combined.shp"
         if not combined_shapefile_path.is_file():
-            timeout = urllib3.Timeout(connect=10.0, read=300)
-            http = urllib3.PoolManager(timeout=timeout)
+            http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=10.0, read=300))
             with http.request(
                 "GET", SHAPEFILE_URL, preload_content=False
             ) as r, zip_path.open("wb") as out_file:
@@ -300,8 +303,8 @@ def extract_basin_shapefile(
 
 def crop_ds(ds: xr.Dataset, start_time: str, end_time: str) -> xr.Dataset:
     """Crops dataset based on time."""
-    get_time(start_time), get_time(end_time)  # if utc, remove Z to parse to np.dt64
-    start, end = np.datetime64(start_time[:-1]), np.datetime64(end_time[:-1])
+    start = pd.Timestamp(get_time(start_time)).tz_convert(None)
+    end = pd.Timestamp(get_time(end_time)).tz_convert(None)
     return ds.isel(
         time=(ds["time"].to_numpy() >= start) & (ds["time"].to_numpy() <= end)
     )
diff --git a/tests/src/base/test_forcing.py b/tests/src/base/test_forcing.py
@@ -1,3 +1,4 @@
+import unittest
 from pathlib import Path
 from shutil import copytree
 from unittest import mock
@@ -281,7 +282,9 @@ def recipe_output_cls(cls, *args, **kwargs):
 
 @pytest.fixture
 def mock_retrieve():
-    with mock.patch("ewatercycle._forcings.caravan.get_dataset") as mock_class:
+    with mock.patch(
+        "ewatercycle._forcings.caravan.CaravanForcing.get_dataset"
+    ) as mock_class:
         test_file = Path(__file__).parent / "forcing_files" / "test_caravan_file.nc"
         mock_class.return_value = xr.open_dataset(test_file)
         yield mock_class
@@ -331,6 +334,43 @@ def test_retrieve_caravan_forcing(tmp_path: Path, mock_retrieve: mock.MagicMock)
     mock_retrieve.assert_called_once_with(basin_id.split("_")[0])
 
 
+def test_retrieve_caravan_forcing_empty_vars(
+    tmp_path: Path, mock_retrieve: mock.MagicMock
+):
+    basin_id = "camels_03439000"
+    test_files_dir = Path(__file__).parent / "forcing_files"
+    tmp_camels_dir = tmp_path / "camels"
+    copytree(test_files_dir, tmp_camels_dir)
+    caravan_forcing = CaravanForcing.generate(
+        start_time="1981-01-01T00:00:00Z",
+        end_time="1981-03-01T00:00:00Z",
+        directory=str(tmp_camels_dir),
+        basin_id=basin_id,
+    )
+    caravan_forcing.save()
+    ds = caravan_forcing.to_xarray()
+    content = list(ds.data_vars.keys())
+    expected = ["Q", "evspsblpot", "pr", "tas", "tasmax", "tasmin"]
+    assert content == expected
+    mock_retrieve.assert_called_once_with(basin_id.split("_")[0])
+
+
+def test_retrieve_caravan_forcing_no_basin_id(
+    tmp_path: Path, mock_retrieve: mock.MagicMock
+):
+    test_files_dir = Path(__file__).parent / "forcing_files"
+    tmp_camels_dir = tmp_path / "camels"
+    copytree(test_files_dir, tmp_camels_dir)
+
+    msg = "You have to specify a basin ID to be able to generate forcing from Caravan."
+    with pytest.raises(ValueError, match=msg):
+        CaravanForcing.generate(
+            start_time="1981-01-01T00:00:00Z",
+            end_time="1981-03-01T00:00:00Z",
+            directory=str(tmp_camels_dir),
+        )
+
+
 def test_extract_basin_shapefile(tmp_path: Path):
     basin_id = "camels_01022500"
     test_files_dir = Path(__file__).parent / "forcing_files"