Skip to content

Commit

Permalink
change InputError to KeyError (#412)
Browse files Browse the repository at this point in the history
* change InputError to KeyError

* removing unused variable

* adding link & merging barts' branch

* fixing tests

* fixing pre-commit

* adding more tests

* keep forgetting pre-commit

* was better berfore somehow, let leave it at 77.8

* use get_time to crop ds, typo in test_forcing

* removing tz when cropping ds
  • Loading branch information
Daafip committed May 6, 2024
1 parent 12d3672 commit b437045
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 57 deletions.
115 changes: 59 additions & 56 deletions src/ewatercycle/_forcings/caravan.py
Expand Up @@ -4,14 +4,12 @@
from typing import Type

import fiona
import numpy as np
import pandas as pd
import urllib3
import xarray as xr
from cartopy.io import shapereader

from ewatercycle.base.forcing import DefaultForcing
from ewatercycle.esmvaltool.schema import Dataset
from ewatercycle.util import get_time

COMMON_URL = "ca13056c-c347-4a27-b320-930c2a4dd207"
Expand Down Expand Up @@ -106,6 +104,45 @@ class CaravanForcing(DefaultForcing):
https://gist.github.com/Daafip/ac1b030eb5563a76f4d02175f2716fd7
"""

@classmethod
def get_dataset(cls: Type["CaravanForcing"], dataset: str) -> xr.Dataset:
"""Opens specified dataset from data.4tu.nl OPeNDAP server.
Args:
dataset (str): name of dataset, choose from:
'camels',
'camelsaus',
'camelsbr',
'camelscl',
'camelsgb',
'hysets',
'lamah'
"""
return xr.open_dataset(f"{OPENDAP_URL}{dataset}.nc")

@classmethod
def get_basin_id(cls: Type["CaravanForcing"], dataset: str) -> list[str]:
"""Gets a list of all the basin ids in provided dataset
Args:
dataset (str): name of dataset, choose from:
'camels',
'camelsaus',
'camelsbr',
'camelscl',
'camelsgb',
'hysets',
'lamah'
Note:
https://www.ewatercycle.org/caravan-map/ contains online a set of
interactive maps which allows exploration of the available catchments and
also contains the needed basin_ids.
Alternatively, a zip with shapefiles is available at
https://doi.org/10.4121/ca13056c-c347-4a27-b320-930c2a4dd207.v1 which also
allows exploration of the dataset.
"""
return [val.decode() for val in cls.get_dataset(dataset).basin_id.values]

@classmethod
def generate( # type: ignore[override]
cls: Type["CaravanForcing"],
Expand All @@ -114,7 +151,6 @@ def generate( # type: ignore[override]
directory: str,
variables: tuple[str, ...] = (),
shape: str | Path | None = None,
dataset: str | Dataset | dict = "unused",
**kwargs,
) -> "CaravanForcing":
"""Retrieve caravan for a model.
Expand All @@ -130,28 +166,32 @@ def generate( # type: ignore[override]
if not specified will default to all.
shape: (Optional) Path to a shape file.
If none is specified, will be downloaded automatically.
dataset: Unused
**kwargs:
basin_id: str containing the wanted basin_id. Data sets can be explored
using `CaravanForcing.get_dataset` or `CaravanForcing.get_basin_id`
More explanation in the example notebook mentioned above.
Kwargs:
basin_id: The ID of the desired basin. Data sets can be explored using
`CaravanForcing.get_dataset(dataset_name)` or
`CaravanForcing.get_basin_id(dataset_name)` where `dataset_name` is the
name of a dataset in Caravan (for example, "camels" or "camelsgb").
For more information do `help(CaravanForcing.get_basin_id)` or see
https://www.ewatercycle.org/caravan-map/.
"""
if "basin_id" not in kwargs:
msg = "You have to specify a basin ID to be able to generate forcing from Caravan."
raise InputError(msg)
basin_id = kwargs["basin_id"]
msg = (
"You have to specify a basin ID to be able to generate forcing from"
" Caravan."
)
raise ValueError(msg)
basin_id = str(kwargs["basin_id"])

dataset = basin_id.split("_")[0]
ds = get_dataset(dataset)
dataset: str = basin_id.split("_")[0]
ds = cls.get_dataset(dataset)
ds_basin = ds.sel(basin_id=basin_id.encode())
ds_basin_time = crop_ds(ds_basin, start_time, end_time)

if shape is None:
shape = get_shapefiles(Path(directory), basin_id)

if variables == ():
if len(variables) == 0:
variables = ds_basin_time.data_vars.keys()

# only return the properties which are also in property vars
Expand Down Expand Up @@ -195,53 +235,16 @@ def generate( # type: ignore[override]
return forcing


def get_dataset(dataset) -> xr.Dataset:
"""Opens specified dataset from data.4tu.nl OPeNDAP server.
Args:
dataset (str): name of dataset, choose from:
'camels',
'camelsaus',
'camelsbr',
'camelscl',
'camelsgb',
'hysets',
'lamah'
"""
return xr.open_dataset(f"{OPENDAP_URL}{dataset}.nc")


def get_basin_id(dataset) -> list[str]:
"""Gets a list of all the basin ids in provided dataset
Args:
dataset (str): name of dataset, choose from:
'camels',
'camelsaus',
'camelsbr',
'camelscl',
'camelsgb',
'hysets',
'lamah'
Note:
a zip with shapefiles is available at
https://doi.org/10.4121/ca13056c-c347-4a27-b320-930c2a4dd207.v1 which also
allows exploration of the dataset.
"""
return [val.decode() for val in get_dataset(dataset).basin_id.values]



def get_shapefiles(directory: Path, basin_id: str) -> Path:
"""Retrieve shapefiles from data 4TU.nl ."""
zip_path = directory / "shapefiles.zip"
output_path = directory / "shapefiles"
shape_path = directory / f"{basin_id}.shp"
combined_shapefile_path = output_path / "combined.shp"

if not shape_path.is_file():
combined_shapefile_path = output_path / "combined.shp"
if not combined_shapefile_path.is_file():
timeout = urllib3.Timeout(connect=10.0, read=300)
http = urllib3.PoolManager(timeout=timeout)
http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=10.0, read=300))
with http.request(
"GET", SHAPEFILE_URL, preload_content=False
) as r, zip_path.open("wb") as out_file:
Expand Down Expand Up @@ -300,8 +303,8 @@ def extract_basin_shapefile(

def crop_ds(ds: xr.Dataset, start_time: str, end_time: str) -> xr.Dataset:
"""Crops dataset based on time."""
get_time(start_time), get_time(end_time) # if utc, remove Z to parse to np.dt64
start, end = np.datetime64(start_time[:-1]), np.datetime64(end_time[:-1])
start = pd.Timestamp(get_time(start_time)).tz_convert(None)
end = pd.Timestamp(get_time(end_time)).tz_convert(None)
return ds.isel(
time=(ds["time"].to_numpy() >= start) & (ds["time"].to_numpy() <= end)
)
42 changes: 41 additions & 1 deletion tests/src/base/test_forcing.py
@@ -1,3 +1,4 @@
import unittest
from pathlib import Path
from shutil import copytree
from unittest import mock
Expand Down Expand Up @@ -281,7 +282,9 @@ def recipe_output_cls(cls, *args, **kwargs):

@pytest.fixture
def mock_retrieve():
with mock.patch("ewatercycle._forcings.caravan.get_dataset") as mock_class:
with mock.patch(
"ewatercycle._forcings.caravan.CaravanForcing.get_dataset"
) as mock_class:
test_file = Path(__file__).parent / "forcing_files" / "test_caravan_file.nc"
mock_class.return_value = xr.open_dataset(test_file)
yield mock_class
Expand Down Expand Up @@ -331,6 +334,43 @@ def test_retrieve_caravan_forcing(tmp_path: Path, mock_retrieve: mock.MagicMock)
mock_retrieve.assert_called_once_with(basin_id.split("_")[0])


def test_retrieve_caravan_forcing_empty_vars(
tmp_path: Path, mock_retrieve: mock.MagicMock
):
basin_id = "camels_03439000"
test_files_dir = Path(__file__).parent / "forcing_files"
tmp_camels_dir = tmp_path / "camels"
copytree(test_files_dir, tmp_camels_dir)
caravan_forcing = CaravanForcing.generate(
start_time="1981-01-01T00:00:00Z",
end_time="1981-03-01T00:00:00Z",
directory=str(tmp_camels_dir),
basin_id=basin_id,
)
caravan_forcing.save()
ds = caravan_forcing.to_xarray()
content = list(ds.data_vars.keys())
expected = ["Q", "evspsblpot", "pr", "tas", "tasmax", "tasmin"]
assert content == expected
mock_retrieve.assert_called_once_with(basin_id.split("_")[0])


def test_retrieve_caravan_forcing_no_basin_id(
tmp_path: Path, mock_retrieve: mock.MagicMock
):
test_files_dir = Path(__file__).parent / "forcing_files"
tmp_camels_dir = tmp_path / "camels"
copytree(test_files_dir, tmp_camels_dir)

msg = "You have to specify a basin ID to be able to generate forcing from Caravan."
with pytest.raises(ValueError, match=msg):
CaravanForcing.generate(
start_time="1981-01-01T00:00:00Z",
end_time="1981-03-01T00:00:00Z",
directory=str(tmp_camels_dir),
)


def test_extract_basin_shapefile(tmp_path: Path):
basin_id = "camels_01022500"
test_files_dir = Path(__file__).parent / "forcing_files"
Expand Down

0 comments on commit b437045

Please sign in to comment.