Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PointwiseDownscaler BCSD models failing on check_n_features_n attribute #54

Open
dgergel opened this issue Dec 3, 2020 · 0 comments
Open

Comments

@dgergel
Copy link
Contributor

dgergel commented Dec 3, 2020

Currently the PointwiseDownscaler BCSD temperature and precip models are failing because the BCSD temp and precip objects lose their _check_n_features attribute when applied to a dataarray/dataset with the PointwiseDownscaler wrapper function. The supporting stack trace:

<timed exec> in <module>
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/dataarray.py in load(self, **kwargs)
    806         dask.array.compute
    807         """
--> 808         ds = self._to_temp_dataset().load(**kwargs)
    809         new = self._from_temp_dataset(ds)
    810         self._variable = new._variable
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/dataset.py in load(self, **kwargs)
    652 
    653             # evaluate all the dask arrays simultaneously
--> 654             evaluated_data = da.compute(*lazy_data.values(), **kwargs)
    655 
    656             for k, data in zip(lazy_data, evaluated_data):
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
    434     keys = [x.__dask_keys__() for x in collections]
    435     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 436     results = schedule(dsk, keys, **kwargs)
    437     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    438 
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   2570                     should_rejoin = False
   2571             try:
-> 2572                 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
   2573             finally:
   2574                 for f in futures.values():
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
   1870                 direct=direct,
   1871                 local_worker=local_worker,
-> 1872                 asynchronous=asynchronous,
   1873             )
   1874 
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    765         else:
    766             return sync(
--> 767                 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
    768             )
    769 
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
    332     if error[0]:
    333         typ, exc, tb = error[0]
--> 334         raise exc.with_traceback(tb)
    335     else:
    336         return result[0]
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/utils.py in f()
    316             if callback_timeout is not None:
    317                 future = gen.with_timeout(timedelta(seconds=callback_timeout), future)
--> 318             result[0] = yield future
    319         except Exception as exc:
    320             error[0] = sys.exc_info()
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/tornado/gen.py in run(self)
    760 
    761                     try:
--> 762                         value = future.result()
    763                     except Exception:
    764                         exc_info = sys.exc_info()
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
   1726                             exc = CancelledError(key)
   1727                         else:
-> 1728                             raise exception.with_traceback(traceback)
   1729                         raise exc
   1730                     if errors == "skip":
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/parallel.py in _wrapper()
    282         ]
    283 
--> 284         result = func(*converted_args, **kwargs)
    285 
    286         # check all dims are present
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/core.py in _fit_wrapper()
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/bcsd.py in fit()
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/base.py in _validate_data()
AttributeError: 'BcsdTemperature' object has no attribute '_check_n_features'

This is also evident when comparing the attributes using the dir() function between BCSD objects versus the same objects wrapped by PointwiseDownscaler. For example:

from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
model = BcsdTemperature(return_anoms=False)
dir(model)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_check_X_y', '_check_array', '_check_n_features', '_create_groups', '_fit_attributes', '_get_param_names', '_get_tags', '_more_tags', '_pre_fit', '_qm_fit_by_group', '_qm_transform_by_group', '_remove_climatology', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_timestep', '_validate_data', 'climate_trend', 'climate_trend_grouper', 'fit', 'get_params', 'predict', 'qm_kwargs', 'return_anoms', 'set_params', 'time_grouper']

versus

from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
model = PointWiseDownscaler(BcsdTemperature(return_anoms=False))
dir(model) 
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_dim', '_model', '_models', '_to_feature_x', 'fit', 'predict', 'transform']

It appears that this is coming directly from how we're passing the BCSD object around in the _fit_wrapper function, e.g. here: https://github.com/jhamman/scikit-downscale/blob/master/skdownscale/pointwise_models/core.py#L68. This should be pretty straightforward to fix, looks like copy.deepcopy just isn't getting methods but thought it was worth a design considerations conversation before a PR fix.

cc @jhamman

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant