From 0cd14a564cbc46b33e7c12e9a254c3b287dd3993 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 22 Apr 2020 12:21:38 +0200 Subject: [PATCH 001/342] Silence sphinx warnings (#3990) * generate documentation pages for the idxmin / idxmax methods * fix a few links * convert the mention of coarsen to double backtick quoted and add rolling --- doc/api-hidden.rst | 4 ++++ doc/whats-new.rst | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index cc9517a98ba..313428c29d2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -18,6 +18,8 @@ Dataset.any Dataset.argmax Dataset.argmin + Dataset.idxmax + Dataset.idxmin Dataset.max Dataset.min Dataset.mean @@ -160,6 +162,8 @@ DataArray.any DataArray.argmax DataArray.argmin + DataArray.idxmax + DataArray.idxmin DataArray.max DataArray.min DataArray.mean diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 46319730d21..8b15e57873b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -64,7 +64,7 @@ Bug fixes sorted order during stack+groupby+apply operations. (:issue:`3287`, :pull:`3906`) By `Spencer Hill `_ - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` - can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) + can affect the original :py:class:`DataArray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ - Fix :py:class:`~xarray.plot.FacetGrid` plots with a single contour. (:issue:`3569`, :pull:`3915`). By `Deepak Cherian `_ @@ -200,13 +200,13 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. -- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)`` +- ``coarsen`` and ``rolling`` now respect ``xr.set_options(keep_attrs=True)`` to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword argument ``keep_attrs`` to change this setting. (:issue:`3376`, :pull:`3801`) By `Andrew Thomas `_. - Delete associated indexes when deleting coordinate variables. (:issue:`3746`). By `Deepak Cherian `_. -- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` +- Fix :py:meth:`Dataset.to_zarr` when using ``append_dim`` and ``group`` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. - Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). By `Maximilian Roos `_. @@ -244,7 +244,7 @@ Internal Changes By `Maximilian Roos `_ - Remove xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). By `Mathias Hauser `_. -- Remove conversion to :py:class:`pandas.Panel`, given its removal in pandas +- Remove conversion to ``pandas.Panel``, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ From e1f0f987c76eb170f5b7ca26c1153c4e34760f0e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 22 Apr 2020 19:27:58 +0000 Subject: [PATCH 002/342] Better chunking error messages for zarr backend (#3983) --- doc/whats-new.rst | 2 ++ xarray/backends/zarr.py | 50 ++++++++++++++++++++--------------- xarray/tests/test_backends.py | 22 ++++++++++++--- 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8b15e57873b..fc95e26dabd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -105,6 +105,8 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Raise more informative error messages for chunk size conflicts when writing to zarr files. + By `Deepak Cherian `_. - Run the ``isort`` pre-commit hook only on python source files and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) By `Justus Magin `_. diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c262dae2811..973c167911e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -65,7 +65,7 @@ def __getitem__(self, key): # could possibly have a work-around for 0d data here -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): """ Given encoding chunks (possibly None) and variable chunks (possibly None) """ @@ -88,15 +88,16 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if var_chunks and enc_chunks is None: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes except for final chunk." - " Variable dask chunks %r are incompatible. Consider " - "rechunking using `chunk()`." % (var_chunks,) + "Zarr requires uniform chunk sizes except for final chunk. " + f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " + "Consider rechunking using `chunk()`." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( "Final chunk of Zarr array must be the same size or smaller " - "than the first. Variable Dask chunks %r are incompatible. " - "Consider rechunking using `chunk()`." % var_chunks + f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) @@ -114,13 +115,15 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if len(enc_chunks_tuple) != ndim: # throw away encoding chunks, start over - return _determine_zarr_chunks(None, var_chunks, ndim) + return _determine_zarr_chunks(None, var_chunks, ndim, name) for x in enc_chunks_tuple: if not isinstance(x, int): raise TypeError( - "zarr chunks must be an int or a tuple of ints. " - "Instead found %r" % (enc_chunks_tuple,) + "zarr chunk sizes specified in `encoding['chunks']` " + "must be an int or a tuple of ints. " + f"Instead found encoding['chunks']={enc_chunks_tuple!r} " + f"for variable named {name!r}." ) # if there are chunks in encoding and the variable data is a numpy array, @@ -142,19 +145,22 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): for dchunk in dchunks[:-1]: if dchunk % zchunk: raise NotImplementedError( - "Specified zarr chunks %r would overlap multiple dask " - "chunks %r. This is not implemented in xarray yet. " - " Consider rechunking the data using " - "`chunk()` or specifying different chunks in encoding." - % (enc_chunks_tuple, var_chunks) + f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for " + f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r}. " + "This is not implemented in xarray yet. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) if dchunks[-1] > zchunk: raise ValueError( "Final chunk of Zarr array must be the same size or " - "smaller than the first. The specified Zarr chunk " - "encoding is %r, but %r in variable Dask chunks %r is " - "incompatible. Consider rechunking using `chunk()`." - % (enc_chunks_tuple, dchunks, var_chunks) + "smaller than the first. " + f"Specified Zarr chunk encoding['chunks']={enc_chunks_tuple}, " + f"for variable named {name!r} " + f"but {dchunks} in the variable's Dask chunks {var_chunks} is " + "incompatible with this encoding. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) return enc_chunks_tuple @@ -177,7 +183,7 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): return dimensions, attributes -def extract_zarr_variable_encoding(variable, raise_on_invalid=False): +def extract_zarr_variable_encoding(variable, raise_on_invalid=False, name=None): """ Extract zarr encoding dictionary from xarray Variable @@ -207,7 +213,7 @@ def extract_zarr_variable_encoding(variable, raise_on_invalid=False): del encoding[k] chunks = _determine_zarr_chunks( - encoding.get("chunks"), variable.chunks, variable.ndim + encoding.get("chunks"), variable.chunks, variable.ndim, name ) encoding["chunks"] = chunks return encoding @@ -453,7 +459,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No writer.add(v.data, zarr_array, region=tuple(new_region)) else: # new variable - encoding = extract_zarr_variable_encoding(v, raise_on_invalid=check) + encoding = extract_zarr_variable_encoding( + v, raise_on_invalid=check, name=vn + ) encoded_attrs = {} # the magic for storing the hidden dimension data encoded_attrs[DIMENSION_KEY] = dims diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3fde292c04f..916c29ba7bd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1685,11 +1685,27 @@ def test_chunk_encoding_with_dask(self): # should fail if dask_chunks are irregular... ds_chunk_irreg = ds.chunk({"x": (5, 4, 3)}) - with pytest.raises(ValueError) as e_info: + with raises_regex(ValueError, "uniform chunk sizes."): with self.roundtrip(ds_chunk_irreg) as actual: pass - # make sure this error message is correct and not some other error - assert e_info.match("chunks") + + # should fail if encoding["chunks"] clashes with dask_chunks + badenc = ds.chunk({"x": 4}) + badenc.var1.encoding["chunks"] = (6,) + with raises_regex(NotImplementedError, "named 'var1' would overlap"): + with self.roundtrip(badenc) as actual: + pass + + badenc.var1.encoding["chunks"] = (2,) + with raises_regex(ValueError, "Specified Zarr chunk encoding"): + with self.roundtrip(badenc) as actual: + pass + + badenc = badenc.chunk({"x": (3, 3, 6)}) + badenc.var1.encoding["chunks"] = (3,) + with raises_regex(ValueError, "incompatible with this encoding"): + with self.roundtrip(badenc) as actual: + pass # ... except if the last chunk is smaller than the first ds_chunk_irreg = ds.chunk({"x": (5, 5, 2)}) From c788ee44008cdd65c8b6de40c737f1b28e173496 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Thu, 23 Apr 2020 03:58:09 -0400 Subject: [PATCH 003/342] DOC: add pandas.DataFrame.to_xarray (#3994) Co-authored-by: Ray Bell --- xarray/core/dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d811d54847f..53aa00f22ce 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4598,6 +4598,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas See also -------- xarray.DataArray.from_series + pandas.DataFrame.to_xarray """ # TODO: Add an option to remove dimensions along which the variables # are constant, to enable consistent serialization to/from a dataframe, From 37551da5ebc7861439ac3eefddefb534b76f2895 Mon Sep 17 00:00:00 2001 From: Prajjwal Nijhara Date: Fri, 24 Apr 2020 12:44:54 +0530 Subject: [PATCH 004/342] Fix some code quality and bug-risk issues (#3999) --- .deepsource.toml | 18 ++++++++++++++++++ xarray/convert.py | 6 +++--- xarray/core/computation.py | 4 ++-- xarray/core/formatting.py | 12 +++++------- xarray/core/groupby.py | 4 +++- xarray/plot/plot.py | 2 +- 6 files changed, 32 insertions(+), 14 deletions(-) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000000..e37b41de303 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,18 @@ +version = 1 + +test_patterns = [ + "*/tests/**", + "*/test_*.py" +] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" \ No newline at end of file diff --git a/xarray/convert.py b/xarray/convert.py index 4974a55d8e2..0c86b090f34 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -229,11 +229,11 @@ def _iris_cell_methods_to_str(cell_methods_obj): """ cell_methods = [] for cell_method in cell_methods_obj: - names = "".join([f"{n}: " for n in cell_method.coord_names]) + names = "".join(f"{n}: " for n in cell_method.coord_names) intervals = " ".join( - [f"interval: {interval}" for interval in cell_method.intervals] + f"interval: {interval}" for interval in cell_method.intervals ) - comments = " ".join([f"comment: {comment}" for comment in cell_method.comments]) + comments = " ".join(f"comment: {comment}" for comment in cell_method.comments) extra = " ".join([intervals, comments]).strip() if extra: extra = f" ({extra})" diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6cf4178b5bf..a3723ea9db9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1192,10 +1192,10 @@ def dot(*arrays, dims=None, **kwargs): # construct einsum subscripts, such as '...abc,...ab->...c' # Note: input_core_dims are always moved to the last position subscripts_list = [ - "..." + "".join([dim_map[d] for d in ds]) for ds in input_core_dims + "..." + "".join(dim_map[d] for d in ds) for ds in input_core_dims ] subscripts = ",".join(subscripts_list) - subscripts += "->..." + "".join([dim_map[d] for d in output_core_dims[0]]) + subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0]) join = OPTIONS["arithmetic_join"] # using "inner" emulates `(a * b).sum()` for all joins (except "exact") diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 534d253ecc8..d6732fc182e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -298,12 +298,10 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): return "\n".join( - [ - summarize_variable( - lname, coord.get_level_variable(lname), col_width, marker=marker - ) - for lname in coord.level_names - ] + summarize_variable( + lname, coord.get_level_variable(lname), col_width, marker=marker + ) + for lname in coord.level_names ) @@ -562,7 +560,7 @@ def extra_items_repr(extra_keys, mapping, ab_side): for m in (a_mapping, b_mapping): attr_s = "\n".join( - [summarize_attr(ak, av) for ak, av in m[k].attrs.items()] + summarize_attr(ak, av) for ak, av in m[k].attrs.items() ) attrs_summary.append(attr_s) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5a5f4c0d296..148e16863d1 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -273,7 +273,7 @@ def __init__( grouper=None, bins=None, restore_coord_dims=None, - cut_kwargs={}, + cut_kwargs=None, ): """Create a GroupBy object @@ -299,6 +299,8 @@ def __init__( Extra keyword arguments to pass to `pandas.cut` """ + if cut_kwargs is None: + cut_kwargs = {} from .dataarray import DataArray if grouper is not None and bins is not None: diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 4657bee9415..4d6033bf00d 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -30,7 +30,7 @@ def _infer_line_data(darray, x, y, hue): error_msg = "must be either None or one of ({:s})".format( - ", ".join([repr(dd) for dd in darray.dims]) + ", ".join(repr(dd) for dd in darray.dims) ) ndims = len(darray.dims) From 6ca3bd7148748fbf03d3ede653a83287f852e472 Mon Sep 17 00:00:00 2001 From: Huite Date: Fri, 24 Apr 2020 09:15:43 +0200 Subject: [PATCH 005/342] full_like: error on non-scalar fill_value (#3979) * Avoid multiplication DeprecationWarning in rasterio backend * full_like: error on non-scalar fill_value Fixes #3977 * Added test * Updated what's new * core.utils.is_scalar instead of numpy.is_scalar * More informative error message * raises_regex for error test --- doc/whats-new.rst | 2 ++ xarray/core/common.py | 5 ++++- xarray/tests/test_variable.py | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fc95e26dabd..7b2b3530c41 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,8 @@ New Features Bug fixes ~~~~~~~~~ +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`) + By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) By `Keisuke Fujii `_. - Fix renaming of coords when one or more stacked coords is not in diff --git a/xarray/core/common.py b/xarray/core/common.py index 8f6d57e9f12..1e7069ec51f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,7 +25,7 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs, is_scalar # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... @@ -1397,6 +1397,9 @@ def full_like(other, fill_value, dtype: DTypeLike = None): from .dataset import Dataset from .variable import Variable + if not is_scalar(fill_value): + raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.") + if isinstance(other, Dataset): data_vars = { k: _full_like_variable(v, fill_value, dtype) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 78e3848b8fb..3003e0d66f3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2213,6 +2213,10 @@ def test_full_like(self): assert expect.dtype == bool assert_identical(expect, full_like(orig, True, dtype=bool)) + # raise error on non-scalar fill_value + with raises_regex(ValueError, "must be scalar"): + full_like(orig, [1.0, 2.0]) + @requires_dask def test_full_like_dask(self): orig = Variable( From 33a66d6380c26a59923922ee11e8ffcf0b4f379f Mon Sep 17 00:00:00 2001 From: Ryan May Date: Fri, 24 Apr 2020 01:16:09 -0600 Subject: [PATCH 006/342] Fix handling of abbreviated units like msec (#3998) * Fix handling of abbreviated units like msec By default, xarray tries to decode times with pandas and falls back to cftime. This fixes the exception handler to fallback properly in the cases an unhandled abbreviated unit is passed in. * Add what's new entry --- doc/whats-new.rst | 4 +++- xarray/coding/times.py | 2 +- xarray/tests/test_coding_times.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7b2b3530c41..6fc3260f10d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -82,7 +82,9 @@ Bug fixes - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added `keep_attrs` argument. (:issue:`3968`) By `Tom Nicholas `_. - +- Fix bug in time parsing failing to fall back to cftime. This was causing time + variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) + By `Ryan May `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 965ddd8f043..d923f1ad088 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -155,7 +155,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if use_cftime is None: try: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) - except (OutOfBoundsDatetime, OverflowError): + except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( flat_num_dates.astype(np.float), units, calendar ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 00c34940ce4..1efd4b02bf8 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -432,6 +432,18 @@ def test_decode_360_day_calendar(): assert_array_equal(actual, expected) +@requires_cftime +def test_decode_abbreviation(): + """Test making sure we properly fall back to cftime on abbreviated units.""" + import cftime + + val = np.array([1586628000000.0]) + units = "msecs since 1970-01-01T00:00:00Z" + actual = coding.times.decode_cf_datetime(val, units) + expected = coding.times.cftime_to_nptime(cftime.num2date(val, units)) + assert_array_equal(actual, expected) + + @arm_xfail @requires_cftime @pytest.mark.parametrize( From 4e196f74dccabbc82f43df7806dc0c7810ba526a Mon Sep 17 00:00:00 2001 From: arabidopsis Date: Wed, 29 Apr 2020 23:54:22 +0800 Subject: [PATCH 007/342] ensure Variable._repr_html_ works (#3973) * ensure Variable._repr_html_ works * added PR 3972 to Bug fixes * better attribute access * moved Varible._repr_html_ test to better location Co-authored-by: Stephan Hoyer Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 3 +++ xarray/core/formatting_html.py | 3 ++- xarray/tests/test_formatting_html.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6fc3260f10d..b71e0baa655 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -79,6 +79,9 @@ Bug fixes By `Tom Nicholas `_. - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) By `Taher Chegini `_. +- Fix ``AttributeError`` on displaying a :py:class:`Variable` + in a notebook context. (:issue:`3972`, :pull:`3973`) + By `Ian Castleden `_. - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added `keep_attrs` argument. (:issue:`3968`) By `Tom Nicholas `_. diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 8678a58b381..6e345582ed0 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -183,7 +183,8 @@ def array_section(obj): # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) collapsed = "" - preview = escape(inline_variable_array_repr(obj.variable, max_width=70)) + variable = getattr(obj, "variable", obj) + preview = escape(inline_variable_array_repr(variable, max_width=70)) data_repr = short_data_repr_html(obj) data_icon = _icon("icon-database") diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 239f339208d..94653016416 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -137,3 +137,15 @@ def test_repr_of_dataset(dataset): ) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted + + +def test_variable_repr_html(): + v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) + assert hasattr(v, "_repr_html_") + with xr.set_options(display_style="html"): + html = v._repr_html_().strip() + # We don't do a complete string identity since + # html output is probably subject to change, is long and... reasons. + # Just test that something reasonable was produced. + assert html.startswith("") + assert "xarray.Variable" in html From 8834afa9f617bd201eba00374bb55d96dccec96b Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 29 Apr 2020 18:10:09 +0200 Subject: [PATCH 008/342] Apply blackdoc to the documentation (#4012) * replace tabs with spaces * fix some invalid code * add missing prompts * apply blackdoc * reformat the plotting docs code * whats-new.rst entry --- doc/combining.rst | 62 ++++++------ doc/computation.rst | 165 +++++++++++++++++--------------- doc/contributing.rst | 28 +++--- doc/dask.rst | 66 ++++++++----- doc/data-structures.rst | 144 +++++++++++++++------------- doc/faq.rst | 9 +- doc/groupby.rst | 56 ++++++----- doc/indexing.rst | 99 +++++++++---------- doc/internals.rst | 21 +++-- doc/interpolation.rst | 118 ++++++++++++----------- doc/io.rst | 116 ++++++++++++++--------- doc/pandas.rst | 36 ++++--- doc/plotting.rst | 151 ++++++++++++++++------------- doc/quick-overview.rst | 39 ++++---- doc/reshaping.rst | 112 +++++++++++----------- doc/time-series.rst | 54 ++++++----- doc/weather-climate.rst | 72 +++++++------- doc/whats-new.rst | 199 ++++++++++++++++++++------------------- xarray/core/common.py | 4 +- xarray/core/dataarray.py | 55 ++++++----- xarray/core/dataset.py | 66 +++++++------ 21 files changed, 917 insertions(+), 755 deletions(-) diff --git a/doc/combining.rst b/doc/combining.rst index 05b7f2efc50..ffc6575c579 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -4,11 +4,12 @@ Combining data -------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) * For combining datasets or data arrays along a single dimension, see concatenate_. @@ -28,11 +29,10 @@ that dimension: .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]) arr[:, :1] # this resembles how you would use np.concatenate - xr.concat([arr[:, :1], arr[:, 1:]], dim='y') + xr.concat([arr[:, :1], arr[:, 1:]], dim="y") In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: @@ -41,7 +41,7 @@ new dimension by stacking lower dimensional arrays together: arr[0] # to combine these 1d arrays into a 2d array in numpy, you would use np.array - xr.concat([arr[0], arr[1]], 'x') + xr.concat([arr[0], arr[1]], "x") If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first @@ -49,7 +49,7 @@ dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], 'new_dim') + xr.concat([arr[0], arr[1]], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is @@ -57,14 +57,14 @@ used to label the values along the new dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name='new_dim')) + xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: .. ipython:: python - ds = arr.to_dataset(name='foo') - xr.concat([ds.sel(x='a'), ds.sel(x='b')], 'x') + ds = arr.to_dataset(name="foo") + xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") :py:func:`~xarray.concat` has a number of options which provide deeper control over which variables are concatenated and how it handles conflicting variables @@ -84,8 +84,8 @@ To combine variables and coordinates between multiple ``DataArray`` and/or .. ipython:: python - xr.merge([ds, ds.rename({'foo': 'bar'})]) - xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)]) + xr.merge([ds, ds.rename({"foo": "bar"})]) + xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index @@ -93,7 +93,7 @@ coordinates: .. ipython:: python - other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')}) + other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other]) This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised @@ -116,7 +116,7 @@ used in the :py:class:`~xarray.Dataset` constructor: .. ipython:: python - xr.Dataset({'a': arr[:-1], 'b': arr[1:]}) + xr.Dataset({"a": arr[:-1], "b": arr[1:]}) .. _combine: @@ -131,8 +131,8 @@ are filled with ``NaN``. For example: .. ipython:: python - ar0 = xr.DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])]) - ar1 = xr.DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])]) + ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) + ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) ar1.combine_first(ar0) @@ -152,7 +152,7 @@ variables with new values: .. ipython:: python - ds.update({'space': ('space', [10.2, 9.4, 3.9])}) + ds.update({"space": ("space", [10.2, 9.4, 3.9])}) However, dimensions are still required to be consistent between different Dataset variables, so you cannot change the size of a dimension unless you @@ -170,7 +170,7 @@ syntax: .. ipython:: python - ds['baz'] = xr.DataArray([9, 9, 9, 9, 9], coords=[('x', list('abcde'))]) + ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz Equals and identical @@ -193,7 +193,7 @@ object: .. ipython:: python - arr.identical(arr.rename('bar')) + arr.identical(arr.rename("bar")) :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality check that allows variables to have different dimensions, as long as values @@ -201,8 +201,8 @@ are constant along those new dimensions: .. ipython:: python - left = xr.Dataset(coords={'x': 0}) - right = xr.Dataset({'x': [0, 0, 0]}) + left = xr.Dataset(coords={"x": 0}) + right = xr.Dataset({"x": [0, 0, 0]}) left.broadcast_equals(right) Like pandas objects, two xarray objects are still equal or identical if they have @@ -231,9 +231,9 @@ coordinates as long as any non-missing values agree or are disjoint: .. ipython:: python - ds1 = xr.Dataset({'a': ('x', [10, 20, 30, np.nan])}, {'x': [1, 2, 3, 4]}) - ds2 = xr.Dataset({'a': ('x', [np.nan, 30, 40, 50])}, {'x': [2, 3, 4, 5]}) - xr.merge([ds1, ds2], compat='no_conflicts') + ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) + ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) + xr.merge([ds1, ds2], compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging @@ -273,10 +273,12 @@ datasets into a doubly-nested list, e.g: .. ipython:: python - arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y']) + arr = xr.DataArray( + name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] + ) arr ds_grid = [[arr, arr], [arr, arr]] - xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + xr.combine_nested(ds_grid, concat_dim=["x", "y"]) :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided @@ -286,10 +288,10 @@ we wish to use ``merge`` instead of ``concat``: .. ipython:: python - temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t']) - precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t']) + temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) + precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) ds_grid = [[temp, precip], [temp, precip]] - xr.combine_nested(ds_grid, concat_dim=['t', None]) + xr.combine_nested(ds_grid, concat_dim=["t", None]) :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one @@ -302,8 +304,8 @@ coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python :okwarning: - x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])]) - x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])]) + x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) + x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) These functions can be used by :py:func:`~xarray.open_mfdataset` to open many diff --git a/doc/computation.rst b/doc/computation.rst index 4b8014c4782..3660aed93ed 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -18,17 +18,19 @@ Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python - arr = xr.DataArray(np.random.RandomState(0).randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray( + np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] + ) arr - 3 abs(arr) @@ -45,7 +47,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values: .. ipython:: python - xr.where(arr > 0, 'positive', 'negative') + xr.where(arr > 0, "positive", "negative") Use `@` to perform matrix multiplication: @@ -73,14 +75,14 @@ methods for working with missing data from pandas: .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x']) + x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() x.notnull() x.count() - x.dropna(dim='x') + x.dropna(dim="x") x.fillna(-1) - x.ffill('x') - x.bfill('x') + x.ffill("x") + x.bfill("x") Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. @@ -90,9 +92,12 @@ for filling missing values via 1D interpolation. .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], - coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])}) - x.interpolate_na(dim='x', method='linear', use_coordinate='xx') + x = xr.DataArray( + [0, 1, np.nan, np.nan, 2], + dims=["x"], + coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])}, + ) + x.interpolate_na(dim="x", method="linear", use_coordinate="xx") Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification @@ -110,8 +115,8 @@ applied along particular dimension(s): .. ipython:: python - arr.sum(dim='x') - arr.std(['x', 'y']) + arr.sum(dim="x") + arr.std(["x", "y"]) arr.min() @@ -121,7 +126,7 @@ for wrapping code designed to work with numpy arrays), you can use the .. ipython:: python - arr.get_axis_num('y') + arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: @@ -142,8 +147,7 @@ method supports rolling window aggregation: .. ipython:: python - arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the @@ -194,8 +198,9 @@ We can also manually iterate through ``Rolling`` objects: .. code:: python - for label, arr_window in r: - # arr_window is a view of x + for label, arr_window in r: + # arr_window is a view of x + ... .. _comput.rolling_exp: @@ -222,9 +227,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct('window_dim', stride=2) + rolling_da = r.construct("window_dim", stride=2) rolling_da - rolling_da.mean('window_dim', skipna=False) + rolling_da.mean("window_dim", skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -232,8 +237,8 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python - weight = xr.DataArray([0.25, 0.5, 0.25], dims=['window']) - arr.rolling(y=3).construct('window').dot(weight) + weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) + arr.rolling(y=3).construct("window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. @@ -254,52 +259,52 @@ support weighted ``sum`` and weighted ``mean``. .. ipython:: python - coords = dict(month=('month', [1, 2, 3])) + coords = dict(month=("month", [1, 2, 3])) - prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) - weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords) Create a weighted object: .. ipython:: python - weighted_prec = prec.weighted(weights) - weighted_prec + weighted_prec = prec.weighted(weights) + weighted_prec Calculate the weighted sum: .. ipython:: python - weighted_prec.sum() + weighted_prec.sum() Calculate the weighted mean: .. ipython:: python - weighted_prec.mean(dim="month") + weighted_prec.mean(dim="month") The weighted sum corresponds to: .. ipython:: python - weighted_sum = (prec * weights).sum() - weighted_sum + weighted_sum = (prec * weights).sum() + weighted_sum and the weighted mean to: .. ipython:: python - weighted_mean = weighted_sum / weights.sum() - weighted_mean + weighted_mean = weighted_sum / weights.sum() + weighted_mean However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) - weights = xr.DataArray([8, 1, 1]) + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) - data.weighted(weights).mean() + data.weighted(weights).mean() Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result in 0.6. @@ -309,16 +314,16 @@ If the weights add up to to 0, ``sum`` returns 0: .. ipython:: python - data = xr.DataArray([1.0, 1.0]) - weights = xr.DataArray([-1.0, 1.0]) + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) - data.weighted(weights).sum() + data.weighted(weights).sum() and ``mean`` returns ``NaN``: .. ipython:: python - data.weighted(weights).mean() + data.weighted(weights).mean() .. note:: @@ -336,18 +341,21 @@ methods. This supports the block aggregation along multiple dimensions, .. ipython:: python - x = np.linspace(0, 10, 300) - t = pd.date_range('15/12/1999', periods=364) - da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), - dims=['time', 'x'], coords={'time': t, 'x': x}) - da + x = np.linspace(0, 10, 300) + t = pd.date_range("15/12/1999", periods=364) + da = xr.DataArray( + np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), + dims=["time", "x"], + coords={"time": t, "x": x}, + ) + da In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, .. ipython:: python - da.coarsen(time=7, x=2).mean() + da.coarsen(time=7, x=2).mean() :py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data length is not a multiple of the corresponding window size. @@ -356,14 +364,14 @@ the excess entries or padding ``nan`` to insufficient entries, .. ipython:: python - da.coarsen(time=30, x=2, boundary='trim').mean() + da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, .. ipython:: python - da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() + da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() .. _compute.using_coordinates: @@ -377,24 +385,25 @@ central finite differences using their coordinates, .. ipython:: python - a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[[0.1, 0.11, 0.2, 0.3]]) + a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a - a.differentiate('x') + a.differentiate("x") This method can be used also for multidimensional arrays, .. ipython:: python - a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], - coords={'x': [0.1, 0.11, 0.2, 0.3]}) - a.differentiate('x') + a = xr.DataArray( + np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} + ) + a.differentiate("x") :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, .. ipython:: python - a.integrate('x') + a.integrate("x") .. note:: These methods are limited to simple cartesian geometry. Differentiation @@ -412,9 +421,9 @@ best fitting coefficients along a given dimension and for a given order, .. ipython:: python - x = xr.DataArray(np.arange(10), dims=['x'], name='x') - a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) - out = a.polyfit(dim='x', deg=1, full=True) + x = xr.DataArray(np.arange(10), dims=["x"], name="x") + a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) + out = a.polyfit(dim="x", deg=1, full=True) out The method outputs a dataset containing the coefficients (and more if `full=True`). @@ -443,9 +452,9 @@ arrays with different sizes aligned along different dimensions: .. ipython:: python - a = xr.DataArray([1, 2], [('x', ['a', 'b'])]) + a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a - b = xr.DataArray([-1, -2, -3], [('y', [10, 20, 30])]) + b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b With xarray, we can apply binary mathematical operations to these arrays, and @@ -460,7 +469,7 @@ appeared: .. ipython:: python - c = xr.DataArray(np.arange(6).reshape(3, 2), [b['y'], a['x']]) + c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c a + c @@ -494,7 +503,7 @@ operations. The default result of a binary operation is by the *intersection* .. ipython:: python - arr = xr.DataArray(np.arange(3), [('x', range(3))]) + arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] If coordinate values for a dimension are missing on either argument, all @@ -503,7 +512,7 @@ matching dimensions must have the same size: .. ipython:: :verbatim: - In [1]: arr + xr.DataArray([1, 2], dims='x') + In [1]: arr + xr.DataArray([1, 2], dims="x") ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3 @@ -562,16 +571,20 @@ variables: .. ipython:: python - ds = xr.Dataset({'x_and_y': (('x', 'y'), np.random.randn(3, 5)), - 'x_only': ('x', np.random.randn(3))}, - coords=arr.coords) + ds = xr.Dataset( + { + "x_and_y": (("x", "y"), np.random.randn(3, 5)), + "x_only": ("x", np.random.randn(3)), + }, + coords=arr.coords, + ) ds > 0 Datasets support most of the same methods found on data arrays: .. ipython:: python - ds.mean(dim='x') + ds.mean(dim="x") abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or @@ -594,7 +607,7 @@ Arithmetic between two datasets matches data variables of the same name: .. ipython:: python - ds2 = xr.Dataset({'x_and_y': 0, 'x_only': 100}) + ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 Similarly to index based alignment, the result has the intersection of all @@ -638,7 +651,7 @@ any additional arguments: .. ipython:: python squared_error = lambda x, y: (x - y) ** 2 - arr1 = xr.DataArray([0, 1, 2, 3], dims='x') + arr1 = xr.DataArray([0, 1, 2, 3], dims="x") xr.apply_ufunc(squared_error, arr1, 1) For using more complex operations that consider some array values collectively, @@ -658,21 +671,21 @@ to set ``axis=-1``. As an example, here is how we would wrap .. code-block:: python def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - :suppress: + :suppress: def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - vector_norm(arr1, dim='x') + vector_norm(arr1, dim="x") Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like diff --git a/doc/contributing.rst b/doc/contributing.rst index f581bcd9741..51dba2bb0cc 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -261,13 +261,13 @@ Some other important things to know about the docs: .. ipython:: python x = 2 - x**3 + x ** 3 will be rendered as:: In [1]: x = 2 - In [2]: x**3 + In [2]: x ** 3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the @@ -467,7 +467,7 @@ typically find tests wrapped in a class. .. code-block:: python class TestReallyCoolFeature: - .... + ... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer @@ -477,7 +477,7 @@ writing test classes, we will write test functions like this: .. code-block:: python def test_really_cool_feature(): - .... + ... Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -508,17 +508,23 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place from xarray.testing import assert_equal - @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', marks=pytest.mark.xfail( - reason='to show how it works'))]) + @pytest.mark.parametrize( + "dtype", + [ + "float32", + pytest.param("int16", marks=pytest.mark.skip), + pytest.param( + "int32", marks=pytest.mark.xfail(reason="to show how it works") + ), + ], + ) def test_mark(dtype): - assert str(np.dtype(dtype)) == 'float32' + assert str(np.dtype(dtype)) == "float32" @pytest.fixture @@ -526,7 +532,7 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place return xr.DataArray([1, 2, 3]) - @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + @pytest.fixture(params=["int8", "int16", "int32", "int64"]) def dtype(request): return request.param diff --git a/doc/dask.rst b/doc/dask.rst index 07b3939af6e..2248de9c0d8 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -56,19 +56,26 @@ argument to :py:func:`~xarray.open_dataset` or using the import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3) - ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'), - np.random.randn(30, 180, 180)), - 'time': pd.date_range('2015-01-01', periods=30), - 'longitude': np.arange(180), - 'latitude': np.arange(89.5, -90.5, -1)}) - ds.to_netcdf('example-data.nc') + ds = xr.Dataset( + { + "temperature": ( + ("time", "latitude", "longitude"), + np.random.randn(30, 180, 180), + ), + "time": pd.date_range("2015-01-01", periods=30), + "longitude": np.arange(180), + "latitude": np.arange(89.5, -90.5, -1), + } + ) + ds.to_netcdf("example-data.nc") .. ipython:: python - ds = xr.open_dataset('example-data.nc', chunks={'time': 10}) + ds = xr.open_dataset("example-data.nc", chunks={"time": 10}) ds In this example ``latitude`` and ``longitude`` do not appear in the ``chunks`` @@ -106,7 +113,7 @@ usual way. .. ipython:: python - ds.to_netcdf('manipulated-example-data.nc') + ds.to_netcdf("manipulated-example-data.nc") By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf` will return a ``dask.delayed`` object that can be computed later. @@ -114,8 +121,9 @@ will return a ``dask.delayed`` object that can be computed later. .. ipython:: python from dask.diagnostics import ProgressBar + # or distributed.progress when using the distributed scheduler - delayed_obj = ds.to_netcdf('manipulated-example-data.nc', compute=False) + delayed_obj = ds.to_netcdf("manipulated-example-data.nc", compute=False) with ProgressBar(): results = delayed_obj.compute() @@ -141,8 +149,9 @@ Dask DataFrames do not support multi-indexes so the coordinate variables from th :suppress: import os - os.remove('example-data.nc') - os.remove('manipulated-example-data.nc') + + os.remove("example-data.nc") + os.remove("manipulated-example-data.nc") Using Dask with xarray ---------------------- @@ -199,7 +208,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method: .. ipython:: python - ds = ds.persist() + ds = ds.persist() :py:meth:`~xarray.Dataset.persist` is particularly useful when using a distributed cluster because the data will be loaded into distributed memory @@ -224,11 +233,11 @@ sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method: .. ipython:: python :suppress: - ds = ds.chunk({'time': 10}) + ds = ds.chunk({"time": 10}) .. ipython:: python - rechunked = ds.chunk({'latitude': 100, 'longitude': 100}) + rechunked = ds.chunk({"latitude": 100, "longitude": 100}) You can view the size of existing chunks on an array by viewing the :py:attr:`~xarray.Dataset.chunks` attribute: @@ -256,6 +265,7 @@ lazy Dask arrays, in the :ref:`xarray.ufuncs ` module: .. ipython:: python import xarray.ufuncs as xu + xu.sin(rechunked) To access Dask arrays directly, use the new @@ -302,24 +312,32 @@ we use to calculate `Spearman's rank-correlation coefficient ` and @@ -453,15 +470,15 @@ dataset variables: .. ipython:: python - ds.rename({'temperature': 'temp', 'precipitation': 'precip'}) + ds.rename({"temperature": "temp", "precipitation": "precip"}) The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap dimension and non-dimension variables: .. ipython:: python - ds.coords['day'] = ('time', [6, 7, 8]) - ds.swap_dims({'time': 'day'}) + ds.coords["day"] = ("time", [6, 7, 8]) + ds.swap_dims({"time": "day"}) .. _coordinates: @@ -519,8 +536,8 @@ To convert back and forth between data and coordinates, you can use the .. ipython:: python ds.reset_coords() - ds.set_coords(['temperature', 'precipitation']) - ds['temperature'].reset_coords(drop=True) + ds.set_coords(["temperature", "precipitation"]) + ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, as used for indexing. This mostly because we are not entirely sure how to @@ -544,7 +561,7 @@ logic used for merging coordinates in arithmetic operations .. ipython:: python - alt = xr.Dataset(coords={'z': [10], 'lat': 0, 'lon': 0}) + alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0}) ds.coords.merge(alt.coords) The ``coords.merge`` method may be useful if you want to implement your own @@ -560,7 +577,7 @@ To convert a coordinate (or any ``DataArray``) into an actual .. ipython:: python - ds['time'].to_index() + ds["time"].to_index() A useful shortcut is the ``indexes`` property (on both ``DataArray`` and ``Dataset``), which lazily constructs a dictionary whose keys are given by each @@ -577,9 +594,10 @@ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`: .. ipython:: python - midx = pd.MultiIndex.from_arrays([['R', 'R', 'V', 'V'], [.1, .2, .7, .9]], - names=('band', 'wn')) - mda = xr.DataArray(np.random.rand(4), coords={'spec': midx}, dims='spec') + midx = pd.MultiIndex.from_arrays( + [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn") + ) + mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec") mda For convenience multi-index levels are directly accessible as "virtual" or @@ -587,8 +605,8 @@ For convenience multi-index levels are directly accessible as "virtual" or .. ipython:: python - mda['band'] - mda.wn + mda["band"] + mda.wn Indexing with multi-index levels is also possible using the ``sel`` method (see :ref:`multi-level indexing`). diff --git a/doc/faq.rst b/doc/faq.rst index 576cec5c2b1..a2b8be47e06 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -4,11 +4,12 @@ Frequently Asked Questions ========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) @@ -103,21 +104,21 @@ code fragment .. ipython:: python arr = xr.DataArray([1, 2, 3]) - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: .. ipython:: python - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}, dtype=float) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time .. ipython:: python - pd.Series({'x': arr[0].item(), 'mean': float(arr.mean())}) + pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) .. _approach to metadata: diff --git a/doc/groupby.rst b/doc/groupby.rst index 223185bd0d5..c72a26c45ea 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -26,11 +26,12 @@ Split Let's create a simple example dataset: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python @@ -47,20 +48,20 @@ use a DataArray directly), we get back a ``GroupBy`` object: .. ipython:: python - ds.groupby('letters') + ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: .. ipython:: python - ds.groupby('letters').groups + ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: .. ipython:: python - list(ds.groupby('letters')) + list(ds.groupby("letters")) Just like in pandas, creating a GroupBy object is cheap: it does not actually split the data until you access particular values. @@ -75,8 +76,8 @@ a customized coordinate, but xarray facilitates this via the .. ipython:: python - x_bins = [0,25,50] - ds.groupby_bins('x', x_bins).groups + x_bins = [0, 25, 50] + ds.groupby_bins("x", x_bins).groups The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are @@ -86,8 +87,8 @@ choose `float` labels which identify the bin centers: .. ipython:: python - x_bin_labels = [12.5,37.5] - ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups + x_bin_labels = [12.5, 37.5] + ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups Apply @@ -102,7 +103,8 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').map(standardize) + + arr.groupby("letters").map(standardize) GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an @@ -110,14 +112,14 @@ aggregation function: .. ipython:: python - arr.groupby('letters').mean(dim='x') + arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(...) + ds.groupby("x").std(...) .. note:: @@ -134,7 +136,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(...) + ds.groupby("letters").first(...) By default, they skip missing values (control this with ``skipna``). @@ -149,9 +151,9 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(...) + alt = arr.groupby("letters").mean(...) alt - ds.groupby('letters') - alt + ds.groupby("letters") - alt This last line is roughly equivalent to the following:: @@ -169,11 +171,11 @@ the ``squeeze`` parameter: .. ipython:: python - next(iter(arr.groupby('x'))) + next(iter(arr.groupby("x"))) .. ipython:: python - next(iter(arr.groupby('x', squeeze=False))) + next(iter(arr.groupby("x", squeeze=False))) Although xarray will attempt to automatically :py:attr:`~xarray.DataArray.transpose` dimensions back into their original order @@ -197,13 +199,17 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen .. ipython:: python - da = xr.DataArray([[0,1],[2,3]], - coords={'lon': (['ny','nx'], [[30,40],[40,50]] ), - 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, - dims=['ny','nx']) + da = xr.DataArray( + [[0, 1], [2, 3]], + coords={ + "lon": (["ny", "nx"], [[30, 40], [40, 50]]), + "lat": (["ny", "nx"], [[10, 10], [20, 20]]), + }, + dims=["ny", "nx"], + ) da - da.groupby('lon').sum(...) - da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) + da.groupby("lon").sum(...) + da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` @@ -211,7 +217,7 @@ may be desirable: .. ipython:: python - da.groupby_bins('lon', [0,45,50]).sum() + da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by `lon` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, @@ -219,5 +225,5 @@ applying your function, and then unstacking the result: .. ipython:: python - stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(...).unstack('gridcell') + stacked = da.stack(gridcell=["ny", "nx"]) + stacked.groupby("gridcell").sum(...).unstack("gridcell") \ No newline at end of file diff --git a/doc/indexing.rst b/doc/indexing.rst index cfbb84a8343..af8e44fb80b 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -4,11 +4,12 @@ Indexing and selecting data =========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers extremely flexible indexing routines that combine the best @@ -60,9 +61,13 @@ DataArray: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) da[:2] da[0, 0] da[:, [2, 1]] @@ -81,7 +86,7 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri .. ipython:: python - da.loc['2000-01-01':'2000-01-02', 'IA'] + da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array in the range '2000-01-01':'2000-01-02' along the first coordinate `time` @@ -98,7 +103,7 @@ Setting values with label based indexing is also supported: .. ipython:: python - da.loc['2000-01-01', ['IL', 'IN']] = -10 + da.loc["2000-01-01", ["IL", "IN"]] = -10 da @@ -117,7 +122,7 @@ use them explicitly to slice data. There are two ways to do this: da[dict(space=0, time=slice(None, 2))] # index by dimension coordinate labels - da.loc[dict(time=slice('2000-01-01', '2000-01-02'))] + da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] 2. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: @@ -128,7 +133,7 @@ use them explicitly to slice data. There are two ways to do this: da.isel(space=0, time=slice(None, 2)) # index by dimension coordinate labels - da.sel(time=slice('2000-01-01', '2000-01-02')) + da.sel(time=slice("2000-01-01", "2000-01-02")) The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, @@ -156,16 +161,16 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, .. ipython:: python - da = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - da.sel(x=[1.1, 1.9], method='nearest') - da.sel(x=0.1, method='backfill') - da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + da.sel(x=[1.1, 1.9], method="nearest") + da.sel(x=0.1, method="backfill") + da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: .. ipython:: python - da.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) + da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: @@ -173,7 +178,7 @@ to ``.sel()`` is a ``slice`` object: .. ipython:: :verbatim: - In [1]: da.sel(x=slice(1, 3), method='nearest') + In [1]: da.sel(x=slice(1, 3), method="nearest") NotImplementedError However, you don't need to use ``method`` to do inexact slicing. Slicing @@ -182,15 +187,15 @@ labels are monotonic increasing: .. ipython:: python - da.sel(x=slice(0.9, 3.1)) + da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: .. ipython:: python - reversed_da = da[::-1] - reversed_da.loc[3.1:0.9] + reversed_da = da[::-1] + reversed_da.loc[3.1:0.9] .. note:: @@ -227,7 +232,7 @@ arrays). However, you can do normal indexing with dimension names: .. ipython:: python ds[dict(space=[0], time=[0])] - ds.loc[dict(time='2000-01-01')] + ds.loc[dict(time="2000-01-01")] Using indexing to *assign* values to a subset of dataset (e.g., ``ds[dict(space=0)] = 1``) is not yet supported. @@ -240,7 +245,7 @@ index labels along a dimension dropped: .. ipython:: python - ds.drop_sel(space=['IN', 'IL']) + ds.drop_sel(space=["IN", "IL"]) ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. @@ -249,7 +254,7 @@ Any variables with these dimensions are also dropped: .. ipython:: python - ds.drop_dims('time') + ds.drop_dims("time") .. _masking with where: @@ -263,7 +268,7 @@ xarray, use :py:meth:`~xarray.DataArray.where`: .. ipython:: python - da = xr.DataArray(np.arange(16).reshape(4, 4), dims=['x', 'y']) + da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) This is particularly useful for ragged indexing of multi-dimensional data, @@ -296,7 +301,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`: .. ipython:: python - da = xr.DataArray([1, 2, 3, 4, 5], dims=['x']) + da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) :py:meth:`~xarray.DataArray.isin` works particularly well with @@ -305,7 +310,7 @@ already labels of an array: .. ipython:: python - lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=['x']) + lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) However, some caution is in order: when done repeatedly, this type of indexing @@ -328,7 +333,6 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: .. ipython:: python - da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], @@ -344,8 +348,8 @@ dimensions: .. ipython:: python - ind_x = xr.DataArray([0, 1], dims=['x']) - ind_y = xr.DataArray([0, 1], dims=['y']) + ind_x = xr.DataArray([0, 1], dims=["x"]) + ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] # orthogonal indexing da[ind_x, ind_x] # vectorized indexing @@ -364,7 +368,7 @@ indexers' dimension: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] Similar to how NumPy's `advanced indexing`_ works, vectorized @@ -378,18 +382,18 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] - ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) + ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) These methods may also be applied to ``Dataset`` objects .. ipython:: python - ds = da.to_dataset(name='bar') - ds.isel(x=xr.DataArray([0, 1, 2], dims=['points'])) + ds = da.to_dataset(name="bar") + ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) .. tip:: @@ -476,8 +480,8 @@ Like ``numpy.ndarray``, value assignment sometimes works differently from what o .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) - ind = xr.DataArray([0, 0, 0], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) + ind = xr.DataArray([0, 0, 0], dims=["x"]) da[ind] -= 1 da @@ -511,7 +515,7 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this da.isel(x=[0, 1, 2])[1] = -1 da @@ -581,15 +585,15 @@ To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: .. ipython:: python - da.reindex(space=['IA', 'CA']) + da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: .. ipython:: python - foo = da.rename('foo') - baz = (10 * da[:2, :2]).rename('baz') + foo = da.rename("foo") + baz = (10 * da[:2, :2]).rename("baz") baz Reindexing ``foo`` with ``baz`` selects out the first two values along each @@ -611,8 +615,8 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik .. ipython:: python - xr.align(foo, baz, join='inner') - xr.align(foo, baz, join='outer') + xr.align(foo, baz, join="inner") + xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: @@ -621,7 +625,7 @@ Both ``reindex_like`` and ``align`` work interchangeably between ds ds.reindex_like(baz) - other = xr.DataArray(['a', 'b', 'c'], dims='other') + other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) @@ -636,7 +640,7 @@ integer-based indexing as a fallback for dimensions without a coordinate label: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) Alignment between xarray objects where one or both do not have coordinate labels @@ -675,9 +679,9 @@ labels: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da - da.get_index('x') + da.get_index("x") .. _copies_vs_views: @@ -721,7 +725,6 @@ pandas: .. ipython:: python - midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda @@ -732,20 +735,20 @@ a slice of tuples: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. ipython:: python - mda.sel(one='a', two=0) + mda.sel(one="a", two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension @@ -757,7 +760,7 @@ multi-index is reduced to a single index. .. ipython:: python - mda.loc[{'one': 'a'}, ...] + mda.loc[{"one": "a"}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for diff --git a/doc/internals.rst b/doc/internals.rst index a4870f2316a..c23aab8c5d7 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -46,11 +46,12 @@ Extending xarray ---------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray is designed as a general purpose library, and hence tries to avoid @@ -87,11 +88,12 @@ defined that returns an instance of your class: .. code-block:: python - class Dataset: - ... - @property - def geo(self) - return GeoAccessor(self) + class Dataset: + ... + + @property + def geo(self): + return GeoAccessor(self) However, using the register accessor decorators is preferable to simply adding your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several @@ -116,14 +118,13 @@ reasons: Back in an interactive IPython session, we can use these properties: .. ipython:: python - :suppress: + :suppress: - exec(open("examples/_code/accessor_example.py").read()) + exec(open("examples/_code/accessor_example.py").read()) .. ipython:: python - ds = xr.Dataset({'longitude': np.linspace(0, 10), - 'latitude': np.linspace(0, 20)}) + ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center ds.geo.plot() diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 4cf39807e5a..c2922813e15 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -4,11 +4,12 @@ Interpolating data ================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers flexible interpolation routines, which have a similar interface @@ -27,9 +28,10 @@ indexing of a :py:class:`~xarray.DataArray`, .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) # label lookup da.sel(time=3) @@ -52,16 +54,17 @@ To interpolate data with a :py:doc:`numpy.datetime64 .. ipython:: python - da_dt64 = xr.DataArray([1, 3], - [('time', pd.date_range('1/1/2000', '1/3/2000', periods=2))]) - da_dt64.interp(time='2000-01-02') + da_dt64 = xr.DataArray( + [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] + ) + da_dt64.interp(time="2000-01-02") The interpolated data can be merged into the original :py:class:`~xarray.DataArray` by specifying the time periods required. .. ipython:: python - da_dt64.interp(time=pd.date_range('1/1/2000', '1/3/2000', periods=3)) + da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3)) Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. @@ -108,9 +111,10 @@ different coordinates, .. ipython:: python - other = xr.DataArray(np.sin(0.4 * np.arange(9).reshape(3, 3)), - [('time', [0.9, 1.9, 2.9]), - ('space', [0.15, 0.25, 0.35])]) + other = xr.DataArray( + np.sin(0.4 * np.arange(9).reshape(3, 3)), + [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])], + ) it might be a good idea to first interpolate ``da`` so that it will stay on the same coordinates of ``other``, and then subtract it. @@ -118,9 +122,9 @@ same coordinates of ``other``, and then subtract it. .. ipython:: python - # interpolate da along other's coordinates - interpolated = da.interp_like(other) - interpolated + # interpolate da along other's coordinates + interpolated = da.interp_like(other) + interpolated It is now possible to safely compute the difference ``other - interpolated``. @@ -135,12 +139,15 @@ The interpolation method can be specified by the optional ``method`` argument. .. ipython:: python - da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x', - coords={'x': np.linspace(0, 1, 10)}) + da = xr.DataArray( + np.sin(np.linspace(0, 2 * np.pi, 10)), + dims="x", + coords={"x": np.linspace(0, 1, 10)}, + ) - da.plot.line('o', label='original') - da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)') - da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic') + da.plot.line("o", label="original") + da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") + da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") @savefig interpolation_sample1.png width=4in plt.legend() @@ -149,15 +156,16 @@ Additional keyword arguments can be passed to scipy's functions. .. ipython:: python # fill 0 for the outside of the original coordinates. - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) # 1-dimensional extrapolation - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) # multi-dimensional extrapolation - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) - da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={'fill_value': None}) + da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": None}) Advanced Interpolation @@ -181,17 +189,18 @@ For example: .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)), - [('x', np.arange(5)), - ('y', [0.1, 0.2, 0.3, 0.4])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(20).reshape(5, 4)), + [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])], + ) # advanced indexing - x = xr.DataArray([0, 2, 4], dims='z') - y = xr.DataArray([0.1, 0.2, 0.3], dims='z') + x = xr.DataArray([0, 2, 4], dims="z") + y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) # advanced interpolation - x = xr.DataArray([0.5, 1.5, 2.5], dims='z') - y = xr.DataArray([0.15, 0.25, 0.35], dims='z') + x = xr.DataArray([0.5, 1.5, 2.5], dims="z") + y = xr.DataArray([0.15, 0.25, 0.35], dims="z") da.interp(x=x, y=y) where values on the original coordinates @@ -203,9 +212,8 @@ If you want to add a coordinate to the new dimension ``z``, you can supply .. ipython:: python - x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']}) - y = xr.DataArray([0.15, 0.25, 0.35], dims='z', - coords={'z': ['a', 'b','c']}) + x = xr.DataArray([0.5, 1.5, 2.5], dims="z", coords={"z": ["a", "b", "c"]}) + y = xr.DataArray([0.15, 0.25, 0.35], dims="z", coords={"z": ["a", "b", "c"]}) da.interp(x=x, y=y) For the details of the advanced indexing, @@ -224,19 +232,18 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. .. ipython:: python - da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x', - coords={'x': range(5)}) + da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) - da.interp(x=[0.5, 1.5, 2.5], method='cubic') + da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and then make the interpolation .. ipython:: python - dropped = da.dropna('x') + dropped = da.dropna("x") dropped - dropped.interp(x=[0.5, 1.5, 2.5], method='cubic') + dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, dropping all the columns containing more than one NaNs by @@ -246,7 +253,7 @@ which is similar to :py:meth:`pandas.Series.interpolate`. .. ipython:: python - filled = da.interpolate_na(dim='x') + filled = da.interpolate_na(dim="x") filled This fills NaN by interpolating along the specified dimension. @@ -254,7 +261,7 @@ After filling NaNs, you can interpolate: .. ipython:: python - filled.interp(x=[0.5, 1.5, 2.5], method='cubic') + filled.interp(x=[0.5, 1.5, 2.5], method="cubic") For the details of :py:meth:`~xarray.DataArray.interpolate_na`, see :ref:`Missing values `. @@ -268,18 +275,18 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. ipython:: python # Raw data - ds = xr.tutorial.open_dataset('air_temperature').isel(time=0) + ds = xr.tutorial.open_dataset("air_temperature").isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) - axes[0].set_title('Raw data') + axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in - axes[1].set_title('Interpolated data') + axes[1].set_title("Interpolated data") Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. @@ -291,20 +298,23 @@ The remapping can be done as follows x = np.linspace(240, 300, 100) z = np.linspace(20, 70, 100) # relation between new and original coordinates - lat = xr.DataArray(z, dims=['z'], coords={'z': z}) - lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270, - dims=['x', 'z'], coords={'x': x, 'z': z}) + lat = xr.DataArray(z, dims=["z"], coords={"z": z}) + lon = xr.DataArray( + (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270, + dims=["x", "z"], + coords={"x": x, "z": z}, + ) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) # draw the new coordinate on the original coordinates. for idx in [0, 33, 66, 99]: - axes[0].plot(lon.isel(x=idx), lat, '--k') + axes[0].plot(lon.isel(x=idx), lat, "--k") for idx in [0, 33, 66, 99]: - axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k') - axes[0].set_title('Raw data') + axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k") + axes[0].set_title("Raw data") dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample4.png width=8in - axes[1].set_title('Remapped data') + axes[1].set_title("Remapped data") \ No newline at end of file diff --git a/doc/io.rst b/doc/io.rst index 0c666099df8..738d8d2b7ab 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -9,11 +9,12 @@ simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` format (recommended). .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. _io.netcdf: @@ -52,12 +53,16 @@ We can save a Dataset to disk using the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) - ds.to_netcdf('saved_on_disk.nc') + ds.to_netcdf("saved_on_disk.nc") By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with @@ -76,7 +81,7 @@ We can load netCDF files to create a new Dataset using .. ipython:: python - ds_disk = xr.open_dataset('saved_on_disk.nc') + ds_disk = xr.open_dataset("saved_on_disk.nc") ds_disk Similarly, a DataArray can be saved to disk using the @@ -117,7 +122,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python # this automatically closes the dataset after use - with xr.open_dataset('saved_on_disk.nc') as ds: + with xr.open_dataset("saved_on_disk.nc") as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on @@ -171,7 +176,7 @@ You can view this encoding information (among others) in the .. ipython:: :verbatim: - In [1]: ds_disk['y'].encoding + In [1]: ds_disk["y"].encoding Out[1]: {'zlib': False, 'shuffle': False, @@ -469,7 +474,7 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :okwarning: # Writing complex valued data - da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j]) + da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j]) da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True) # Reading it back @@ -479,7 +484,8 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :suppress: import os - os.remove('complex.nc') + + os.remove("complex.nc") .. warning:: @@ -499,9 +505,11 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using .. ipython:: python - da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], - coords=dict(x=[10, 20, 30, 40], - y=pd.date_range('2000-01-01', periods=5))) + da = xr.DataArray( + np.random.rand(4, 5), + dims=["x", "y"], + coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), + ) cube = da.to_iris() cube @@ -548,8 +556,9 @@ __ http://iri.columbia.edu/ :verbatim: In [3]: remote_data = xr.open_dataset( - ...: 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - ...: decode_times=False) + ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + ...: decode_times=False, + ...: ) In [4]: remote_data Out[4]: @@ -587,7 +596,7 @@ over the network until we look at particular values: .. ipython:: :verbatim: - In [4]: tmax = remote_data['tmax'][:500, ::3, ::3] + In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] In [5]: tmax Out[5]: @@ -715,7 +724,8 @@ search indices or other automated data discovery tools. :suppress: import os - os.remove('saved_on_disk.nc') + + os.remove("saved_on_disk.nc") .. _io.rasterio: @@ -729,7 +739,7 @@ rasterio is installed. Here is an example of how to use .. ipython:: :verbatim: - In [7]: rio = xr.open_rasterio('RGB.byte.tif') + In [7]: rio = xr.open_rasterio("RGB.byte.tif") In [8]: rio Out[8]: @@ -769,7 +779,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [1]: import rioxarray - In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif') + In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") In [3]: rds Out[3]: @@ -799,7 +809,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) - In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif') + In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ @@ -831,17 +841,21 @@ xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` meth To write to a local directory, we pass a path to a directory .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) - ds.to_zarr('path/to/directory.zarr') + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) + ds.to_zarr("path/to/directory.zarr") (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -854,22 +868,30 @@ It is also possible to append to an existing store. For that, set can be omitted as it will internally be set to ``'a'``. .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds1 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-01', periods=2)}) - ds1.to_zarr('path/to/directory.zarr') - ds2 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', append_dim='t') + ds1 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-01", periods=2), + }, + ) + ds1.to_zarr("path/to/directory.zarr") + ds2 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-03", periods=2), + }, + ) + ds2.to_zarr("path/to/directory.zarr", append_dim="t") To store variable length strings use ``dtype=object``. @@ -878,7 +900,7 @@ To read back a zarr dataset that has been created this way, we use the .. ipython:: python - ds_zarr = xr.open_zarr('path/to/directory.zarr') + ds_zarr = xr.open_zarr("path/to/directory.zarr") ds_zarr Cloud Storage Buckets @@ -912,15 +934,16 @@ These options can be passed to the ``to_zarr`` method as variable encoding. For example: .. ipython:: python - :suppress: + :suppress: ! rm -rf foo.zarr .. ipython:: python import zarr - compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2) - ds.to_zarr('foo.zarr', encoding={'foo': {'compressor': compressor}}) + + compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) + ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) .. note:: @@ -959,11 +982,12 @@ be done directly from zarr, as described in the .. _io.cfgrib: .. ipython:: python - :suppress: + :suppress: import shutil - shutil.rmtree('foo.zarr') - shutil.rmtree('path/to/directory.zarr') + + shutil.rmtree("foo.zarr") + shutil.rmtree("path/to/directory.zarr") GRIB format via cfgrib ---------------------- @@ -975,7 +999,7 @@ to :py:func:`open_dataset`: .. ipython:: :verbatim: - In [1]: ds_grib = xr.open_dataset('example.grib', engine='cfgrib') + In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") We recommend installing ecCodes via conda:: diff --git a/doc/pandas.rst b/doc/pandas.rst index b0ec2a117dc..acf1d16b6ee 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -20,6 +20,7 @@ __ http://seaborn.pydata.org/ import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Hierarchical and tidy data @@ -47,10 +48,15 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(2, 3))}, - coords={'x': [10, 20], 'y': ['a', 'b', 'c'], - 'along_x': ('x', np.random.randn(2)), - 'scalar': 123}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.randn(2, 3))}, + coords={ + "x": [10, 20], + "y": ["a", "b", "c"], + "along_x": ("x", np.random.randn(2)), + "scalar": 123, + }, + ) ds df = ds.to_dataframe() df @@ -91,7 +97,7 @@ DataFrames: .. ipython:: python - s = ds['foo'].to_series() + s = ds["foo"].to_series() s # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -117,8 +123,9 @@ available in pandas (i.e., a 1D array is converted to a .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - coords=[('x', [10, 20]), ('y', ['a', 'b', 'c'])]) + arr = xr.DataArray( + np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] + ) df = arr.to_pandas() df @@ -136,9 +143,10 @@ preserve all use of multi-indexes: .. ipython:: python - index = pd.MultiIndex.from_arrays([['a', 'a', 'b'], [0, 1, 2]], - names=['one', 'two']) - df = pd.DataFrame({'x': 1, 'y': 2}, index=index) + index = pd.MultiIndex.from_arrays( + [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] + ) + df = pd.DataFrame({"x": 1, "y": 2}, index=index) ds = xr.Dataset(df) ds @@ -175,9 +183,9 @@ Let's take a look: .. ipython:: python data = np.random.RandomState(0).rand(2, 3, 4) - items = list('ab') - major_axis = list('mno') - minor_axis = pd.date_range(start='2000', periods=4, name='date') + items = list("ab") + major_axis = list("mno") + minor_axis = pd.date_range(start="2000", periods=4, name="date") With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: @@ -207,7 +215,7 @@ You can also easily convert this data into ``Dataset``: .. ipython:: python - array.to_dataset(dim='dim_0') + array.to_dataset(dim="dim_0") Here, there are two data variables, each representing a DataFrame on panel's ``items`` axis, and labeled as such. Each variable is a 2D array of the diff --git a/doc/plotting.rst b/doc/plotting.rst index f3d9c0213de..fb30417e2c6 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -56,6 +56,7 @@ Imports # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl + mpl.rcdefaults() The following imports are necessary for all of the examples. @@ -71,7 +72,7 @@ For these examples we'll use the North American air temperature dataset. .. ipython:: python - airtemps = xr.tutorial.open_dataset('air_temperature') + airtemps = xr.tutorial.open_dataset("air_temperature") airtemps # Convert to celsius @@ -79,7 +80,7 @@ For these examples we'll use the North American air temperature dataset. # copy attributes to get nice figure labels and change Kelvin to Celsius air.attrs = airtemps.air.attrs - air.attrs['units'] = 'deg C' + air.attrs["units"] = "deg C" .. note:: Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above). @@ -126,7 +127,7 @@ can be used: .. ipython:: python @savefig plotting_1d_additional_args.png width=4in - air1d[:200].plot.line('b-^') + air1d[:200].plot.line("b-^") .. note:: Not all xarray plotting methods support passing positional arguments @@ -138,7 +139,7 @@ Keyword arguments work the same way, and are more explicit. .. ipython:: python @savefig plotting_example_sin3.png width=4in - air1d[:200].plot.line(color='purple', marker='o') + air1d[:200].plot.line(color="purple", marker="o") ========================= Adding to Existing Axis @@ -219,7 +220,7 @@ plots to check the variation of air temperature at three different latitudes alo .. ipython:: python @savefig plotting_example_multiple_lines_x_kwarg.png - air.isel(lon=10, lat=[19,21,22]).plot.line(x='time') + air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") It is required to explicitly specify either @@ -240,7 +241,7 @@ It is also possible to make line plots such that the data are on the x-axis and .. ipython:: python @savefig plotting_example_xy_kwarg.png - air.isel(time=10, lon=[10, 11]).plot(y='lat', hue='lon') + air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") ============ Step plots @@ -253,7 +254,7 @@ made using 1D data. :okwarning: @savefig plotting_example_step.png width=4in - air1d[:20].plot.step(where='mid') + air1d[:20].plot.step(where="mid") The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy @@ -261,15 +262,15 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python - air_grp = air.mean(['time','lon']).groupby_bins('lat',[0,23.5,66.5,90]) + air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() air_std = air_grp.std() air_mean.plot.step() - (air_mean + air_std).plot.step(ls=':') - (air_mean - air_std).plot.step(ls=':') - plt.ylim(-20,30) + (air_mean + air_std).plot.step(ls=":") + (air_mean - air_std).plot.step(ls=":") + plt.ylim(-20, 30) @savefig plotting_example_step_groupby.png width=4in - plt.title('Zonal mean temperature') + plt.title("Zonal mean temperature") In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. @@ -284,7 +285,9 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d .. ipython:: python @savefig plotting_example_xincrease_yincrease_kwarg.png - air.isel(time=10, lon=[10, 11]).plot.line(y='lat', hue='lon', xincrease=False, yincrease=False) + air.isel(time=10, lon=[10, 11]).plot.line( + y="lat", hue="lon", xincrease=False, yincrease=False + ) In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``Axes.set_(x,y)scale()``, ``Axes.set_(x,y)ticks()``, ``Axes.set_(x,y)lim()`` respectively. @@ -348,7 +351,7 @@ produce plots with nonuniform coordinates. b = air2d.copy() # Apply a nonlinear transformation to one of the coords - b.coords['lat'] = np.log(b.coords['lat']) + b.coords["lat"] = np.log(b.coords["lat"]) @savefig plotting_nonuniform_coords.png width=4in b.plot() @@ -363,9 +366,9 @@ matplotlib is available. .. ipython:: python air2d.plot(cmap=plt.cm.Blues) - plt.title('These colors prove North America\nhas fallen in the ocean') - plt.ylabel('latitude') - plt.xlabel('longitude') + plt.title("These colors prove North America\nhas fallen in the ocean") + plt.ylabel("latitude") + plt.xlabel("longitude") plt.tight_layout() @savefig plotting_2d_call_matplotlib.png width=4in @@ -381,7 +384,7 @@ matplotlib is available. .. ipython:: python - plt.xlabel('Never gonna see this.') + plt.xlabel("Never gonna see this.") air2d.plot() @savefig plotting_2d_call_matplotlib2.png width=4in @@ -473,10 +476,10 @@ if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). .. ipython:: python - :okwarning: + :okwarning: @savefig plotting_seaborn_palette.png width=4in - air2d.plot(levels=10, cmap='husl') + air2d.plot(levels=10, cmap="husl") plt.draw() .. _plotting.faceting: @@ -520,14 +523,16 @@ arguments to the xarray plotting methods/functions. This returns a .. ipython:: python @savefig plot_facet_dataarray.png - g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3) + g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) Faceting also works for line plots. .. ipython:: python @savefig plot_facet_dataarray_line.png - g_simple_line = t.isel(lat=slice(0,None,4)).plot(x='lon', hue='lat', col='time', col_wrap=3) + g_simple_line = t.isel(lat=slice(0, None, 4)).plot( + x="lon", hue="lat", col="time", col_wrap=3 + ) =============== 4 dimensional @@ -541,12 +546,12 @@ one were much hotter. .. ipython:: python t2 = t.isel(time=slice(0, 2)) - t4d = xr.concat([t2, t2 + 40], pd.Index(['normal', 'hot'], name='fourth_dim')) + t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords @savefig plot_facet_4d.png - t4d.plot(x='lon', y='lat', col='time', row='fourth_dim') + t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") ================ Other features @@ -555,9 +560,9 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. .. ipython:: python - :suppress: + :suppress: - plt.close('all') + plt.close("all") .. ipython:: python @@ -566,9 +571,15 @@ Faceted plotting supports other arguments common to xarray 2d plots. hasoutliers[-1, -1, -1] = 400 @savefig plot_facet_robust.png - g = hasoutliers.plot.pcolormesh('lon', 'lat', col='time', col_wrap=3, - robust=True, cmap='viridis', - cbar_kwargs={'label': 'this has outliers'}) + g = hasoutliers.plot.pcolormesh( + "lon", + "lat", + col="time", + col_wrap=3, + robust=True, + cmap="viridis", + cbar_kwargs={"label": "this has outliers"}, + ) =================== FacetGrid Objects @@ -594,20 +605,20 @@ It's possible to select the :py:class:`xarray.DataArray` or .. ipython:: python - g.data.loc[g.name_dicts[0, 0]] + g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. .. ipython:: python - g = t.plot.imshow('lon', 'lat', col='time', col_wrap=3, robust=True) + g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True) for i, ax in enumerate(g.axes.flat): - ax.set_title('Air Temperature %d' % i) + ax.set_title("Air Temperature %d" % i) bottomright = g.axes[-1, -1] - bottomright.annotate('bottom right', (240, 40)) + bottomright.annotate("bottom right", (240, 40)) @savefig plot_facet_iterator.png plt.draw() @@ -632,8 +643,8 @@ Consider this dataset .. ipython:: python - ds = xr.tutorial.scatter_example_dataset() - ds + ds = xr.tutorial.scatter_example_dataset() + ds Suppose we want to scatter ``A`` against ``B`` @@ -641,14 +652,14 @@ Suppose we want to scatter ``A`` against ``B`` .. ipython:: python @savefig ds_simple_scatter.png - ds.plot.scatter(x='A', y='B') + ds.plot.scatter(x="A", y="B") The ``hue`` kwarg lets you vary the color by variable value .. ipython:: python @savefig ds_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w') + ds.plot.scatter(x="A", y="B", hue="w") When ``hue`` is specified, a colorbar is added for numeric ``hue`` DataArrays by default and a legend is added for non-numeric ``hue`` DataArrays (as above). @@ -659,21 +670,21 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", hue="w", hue_style="discrete") The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. ipython:: python @savefig ds_hue_size_scatter.png - ds.plot.scatter(x='A', y='B', hue='z', hue_style='discrete', markersize='z') + ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z") Faceting is also possible .. ipython:: python @savefig ds_facet_scatter.png - ds.plot.scatter(x='A', y='B', col='x', row='z', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete") For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. @@ -691,11 +702,15 @@ This script will plot the air temperature on a map. .. ipython:: python import cartopy.crs as ccrs - air = xr.tutorial.open_dataset('air_temperature').air + + air = xr.tutorial.open_dataset("air_temperature").air + ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) - air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()); + air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()) + ax.set_global() + @savefig plotting_maps_cartopy.png width=100% - ax.set_global(); ax.coastlines(); + ax.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created @@ -703,13 +718,16 @@ by faceting are accessible in the object returned by ``plot``: .. ipython:: python - p = air.isel(time=[0, 4]).plot(transform=ccrs.PlateCarree(), col='time', - subplot_kws={'projection': ccrs.Orthographic(-80, 35)}) + p = air.isel(time=[0, 4]).plot( + transform=ccrs.PlateCarree(), + col="time", + subplot_kws={"projection": ccrs.Orthographic(-80, 35)}, + ) for ax in p.axes.flat: ax.coastlines() ax.gridlines() @savefig plotting_maps_cartopy_facetting.png width=100% - plt.draw(); + plt.draw() Details @@ -732,6 +750,7 @@ These are provided for user convenience; they all call the same code. .. ipython:: python import xarray.plot as xplt + da = xr.DataArray(range(5)) fig, axes = plt.subplots(ncols=2, nrows=2) da.plot(ax=axes[0, 0]) @@ -766,8 +785,7 @@ read on. .. ipython:: python - a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=('y', 'x', 'z'), - name='temperature') + a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 a = a0.isel(z=0) a @@ -801,14 +819,16 @@ instead of the default ones: .. ipython:: python lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) - lon += lat/10 - lat += lon/10 - da = xr.DataArray(np.arange(20).reshape(4, 5), dims=['y', 'x'], - coords = {'lat': (('y', 'x'), lat), - 'lon': (('y', 'x'), lon)}) + lon += lat / 10 + lat += lon / 10 + da = xr.DataArray( + np.arange(20).reshape(4, 5), + dims=["y", "x"], + coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, + ) @savefig plotting_example_2d_irreg.png width=4in - da.plot.pcolormesh('lon', 'lat'); + da.plot.pcolormesh("lon", "lat") Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined @@ -818,22 +838,25 @@ this convention when plotting on a map: .. ipython:: python import cartopy.crs as ccrs - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. ipython:: python - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map_infer.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) .. note:: The data model of xarray does not support datasets with `cell boundaries`_ @@ -847,6 +870,6 @@ One can also make line plots with multidimensional coordinates. In this case, `` .. ipython:: python f, ax = plt.subplots(2, 1) - da.plot.line(x='lon', hue='y', ax=ax[0]); + da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x='lon', hue='x', ax=ax[1]); + da.plot.line(x="lon", hue="x", ax=ax[1]) diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 741b3d1a5fe..09b0d4c6fbb 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -22,16 +22,14 @@ array or list, with optional *dimensions* and *coordinates*: .. ipython:: python - data = xr.DataArray(np.random.randn(2, 3), - dims=('x', 'y'), - coords={'x': [10, 20]}) + data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) data In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly: .. ipython:: python - xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo')) + xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo")) Here are the key properties for a ``DataArray``: @@ -75,13 +73,13 @@ While you're setting up your DataArray, it's often a good idea to set metadata a .. ipython:: python - data.attrs['long_name'] = 'random velocity' - data.attrs['units'] = 'metres/sec' - data.attrs['description'] = 'A random variable created as an example.' - data.attrs['random_attribute'] = 123 + data.attrs["long_name"] = "random velocity" + data.attrs["units"] = "metres/sec" + data.attrs["description"] = "A random variable created as an example." + data.attrs["random_attribute"] = 123 data.attrs # you can add metadata to coordinates too - data.x.attrs['units'] = 'x units' + data.x.attrs["units"] = "x units" Computation @@ -102,15 +100,15 @@ numbers: .. ipython:: python - data.mean(dim='x') + data.mean(dim="x") Arithmetic operations broadcast based on dimension name. This means you don't need to insert dummy dimensions for alignment: .. ipython:: python - a = xr.DataArray(np.random.randn(3), [data.coords['y']]) - b = xr.DataArray(np.random.randn(4), dims='z') + a = xr.DataArray(np.random.randn(3), [data.coords["y"]]) + b = xr.DataArray(np.random.randn(4), dims="z") a b @@ -139,9 +137,9 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: .. ipython:: python - labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') + labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels") labels - data.groupby(labels).mean('y') + data.groupby(labels).mean("y") data.groupby(labels).map(lambda x: x - x.min()) Plotting @@ -178,7 +176,7 @@ objects. You can think of it as a multi-dimensional generalization of the .. ipython:: python - ds = xr.Dataset({'foo': data, 'bar': ('x', [1, 2]), 'baz': np.pi}) + ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi}) ds @@ -186,7 +184,7 @@ This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``. .. ipython:: python - ds['foo'] + ds["foo"] ds.foo @@ -216,14 +214,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. ipython:: python - ds.to_netcdf('example.nc') - xr.open_dataset('example.nc') + ds.to_netcdf("example.nc") + xr.open_dataset("example.nc") .. ipython:: python - :suppress: + :suppress: import os - os.remove('example.nc') + + os.remove("example.nc") It is common for datasets to be distributed across multiple files (commonly one file per timestep). xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 465ca14dfc2..40de9ea799a 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -7,11 +7,12 @@ Reshaping and reorganizing data These methods allow you to reorganize .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Reordering dimensions @@ -23,9 +24,9 @@ ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) - ds.transpose('y', 'z', 'x') - ds.transpose(..., 'x') # equivalent + ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) + ds.transpose("y", "z", "x") + ds.transpose(..., "x") # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions @@ -37,7 +38,7 @@ use :py:meth:`~xarray.DataArray.expand_dims` .. ipython:: python - expanded = ds.expand_dims('w') + expanded = ds.expand_dims("w") expanded This method attaches a new dimension with size 1 to all data variables. @@ -48,7 +49,7 @@ use :py:meth:`~xarray.DataArray.squeeze` .. ipython:: python - expanded.squeeze('w') + expanded.squeeze("w") Converting between datasets and arrays -------------------------------------- @@ -69,14 +70,14 @@ To convert back from a DataArray to a Dataset, use .. ipython:: python - arr.to_dataset(dim='variable') + arr.to_dataset(dim="variable") The broadcasting behavior of ``to_array`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python - ds2 = xr.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) + ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) # the input dataset has 4 elements ds2 @@ -90,7 +91,7 @@ If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray .. ipython:: python - arr.to_dataset(name='combined') + arr.to_dataset(name="combined") .. _reshape.stack: @@ -103,11 +104,12 @@ implemented :py:meth:`~xarray.DataArray.stack` and .. ipython:: python - array = xr.DataArray(np.random.randn(2, 3), - coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - stacked = array.stack(z=('x', 'y')) + array = xr.DataArray( + np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] + ) + stacked = array.stack(z=("x", "y")) stacked - stacked.unstack('z') + stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: @@ -128,15 +130,15 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje stacked2 = stacked[::2] stacked2 - stacked2.unstack('z') + stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. ipython:: python - array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) - array.stack(z=('x', 'y')) + array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) + array.stack(z=("x", "y")) array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new @@ -166,16 +168,15 @@ like this: .. ipython:: python - data = xr.Dataset( - data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - 'b': ('x', [6, 7])}, - coords={'y': ['u', 'v', 'w']} - ) - data - stacked = data.to_stacked_array("z", sample_dims=['x']) - stacked - unstacked = stacked.to_unstacked_dataset("z") - unstacked + data = xr.Dataset( + data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, + coords={"y": ["u", "v", "w"]}, + ) + data + stacked = data.to_stacked_array("z", sample_dims=["x"]) + stacked + unstacked = stacked.to_unstacked_dataset("z") + unstacked In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic numerical method. @@ -202,19 +203,23 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: .. ipython:: python - da = xr.DataArray(np.random.rand(4), - coords={'band': ('x', ['a', 'a', 'b', 'b']), - 'wavenumber': ('x', np.linspace(200, 400, 4))}, - dims='x') - da - mda = da.set_index(x=['band', 'wavenumber']) - mda + da = xr.DataArray( + np.random.rand(4), + coords={ + "band": ("x", ["a", "a", "b", "b"]), + "wavenumber": ("x", np.linspace(200, 400, 4)), + }, + dims="x", + ) + da + mda = da.set_index(x=["band", "wavenumber"]) + mda These coordinates can now be used for indexing, e.g., .. ipython:: python - mda.sel(band='a') + mda.sel(band="a") Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful @@ -222,14 +227,14 @@ for serialization): .. ipython:: python - mda.reset_index('x') + mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: .. ipython:: python - mda.reorder_levels(x=['wavenumber', 'band']) + mda.reorder_levels(x=["wavenumber", "band"]) As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove @@ -237,12 +242,12 @@ labels for one or several dimensions: .. ipython:: python - array = xr.DataArray([1, 2, 3], dims='x') + array = xr.DataArray([1, 2, 3], dims="x") array - array['c'] = ('x', ['a', 'b', 'c']) - array.set_index(x='c') - array = array.set_index(x='c') - array = array.reset_index('x', drop=True) + array["c"] = ("x", ["a", "b", "c"]) + array.set_index(x="c") + array = array.set_index(x="c") + array = array.reset_index("x", drop=True) .. _reshape.shift_and_roll: @@ -254,9 +259,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an .. ipython:: python - array = xr.DataArray([1, 2, 3, 4], dims='x') - array.shift(x=2) - array.roll(x=2, roll_coords=True) + array = xr.DataArray([1, 2, 3, 4], dims="x") + array.shift(x=2) + array.roll(x=2, roll_coords=True) .. _reshape.sort: @@ -269,17 +274,18 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and .. ipython:: python - ds = xr.Dataset({'A': (('x', 'y'), [[1, 2], [3, 4]]), - 'B': (('x', 'y'), [[5, 6], [7, 8]])}, - coords={'x': ['b', 'a'], 'y': [1, 0]}) - dax = xr.DataArray([100, 99], [('x', [0, 1])]) - day = xr.DataArray([90, 80], [('y', [0, 1])]) - ds.sortby([day, dax]) + ds = xr.Dataset( + {"A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]])}, + coords={"x": ["b", "a"], "y": [1, 0]}, + ) + dax = xr.DataArray([100, 99], [("x", [0, 1])]) + day = xr.DataArray([90, 80], [("y", [0, 1])]) + ds.sortby([day, dax]) As a shortcut, you can refer to existing coordinates by name: .. ipython:: python - ds.sortby('x') - ds.sortby(['y', 'x']) - ds.sortby(['y', 'x'], ascending=False) + ds.sortby("x") + ds.sortby(["y", "x"]) + ds.sortby(["y", "x"], ascending=False) \ No newline at end of file diff --git a/doc/time-series.rst b/doc/time-series.rst index d838dbbd4cd..96a2edc0ea5 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -10,11 +10,12 @@ data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Creating datetime64 data @@ -29,8 +30,8 @@ using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`: .. ipython:: python - pd.to_datetime(['2000-01-01', '2000-02-02']) - pd.date_range('2000-01-01', periods=365) + pd.to_datetime(["2000-01-01", "2000-02-02"]) + pd.date_range("2000-01-01", periods=365) Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects: @@ -38,7 +39,8 @@ converted automatically when used as arguments in xarray objects: .. ipython:: python import datetime - xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) + + xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` @@ -62,8 +64,8 @@ You can manual decode arrays in this form by passing a dataset to .. ipython:: python - attrs = {'units': 'hours since 2000-01-01'} - ds = xr.Dataset({'time': ('time', [0, 1, 2, 3], attrs)}) + attrs = {"units": "hours since 2000-01-01"} + ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) xr.decode_cf(ds) One unfortunate limitation of using ``datetime64[ns]`` is that it limits the @@ -87,10 +89,10 @@ items and with the `slice` object: .. ipython:: python - time = pd.date_range('2000-01-01', freq='H', periods=365 * 24) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 24)), 'time': time}) - ds.sel(time='2000-01') - ds.sel(time=slice('2000-06-01', '2000-06-10')) + time = pd.date_range("2000-01-01", freq="H", periods=365 * 24) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) + ds.sel(time="2000-01") + ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a :py:class:`datetime.time` object: @@ -113,8 +115,8 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. ipython:: python - time = pd.date_range('2000-01-01', freq='6H', periods=365 * 4) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 4)), 'time': time}) + time = pd.date_range("2000-01-01", freq="6H", periods=365 * 4) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour ds.time.dt.dayofweek @@ -130,16 +132,16 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. ipython:: python - ds['time.month'] - ds['time.dayofyear'] + ds["time.month"] + ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: .. ipython:: python - ds['time.season'] - ds['time'].dt.season + ds["time.season"] + ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by the first letters of the corresponding months. @@ -152,7 +154,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. ipython:: python - ds['time'].dt.floor('D') + ds["time"].dt.floor("D") The ``.dt`` accessor can also be used to generate formatted datetime strings for arrays utilising the same formatting as the standard `datetime.strftime`_. @@ -161,7 +163,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_. .. ipython:: python - ds['time'].dt.strftime('%a, %b %d %H:%M') + ds["time"].dt.strftime("%a, %b %d %H:%M") .. _resampling: @@ -173,9 +175,9 @@ Datetime components couple particularly well with grouped operations (see calculate the mean by time of day: .. ipython:: python - :okwarning: + :okwarning: - ds.groupby('time.hour').mean() + ds.groupby("time.hour").mean() For upsampling or downsampling temporal resolutions, xarray offers a :py:meth:`~xarray.Dataset.resample` method building on the core functionality @@ -187,25 +189,25 @@ same api as ``resample`` `in pandas`_. For example, we can downsample our dataset from hourly to 6-hourly: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H') + ds.resample(time="6H") This will create a specialized ``Resample`` object which saves information necessary for resampling. All of the reduction methods which work with ``Resample`` objects can also be used for resampling: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H').mean() + ds.resample(time="6H").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: .. ipython:: python - ds.resample(time='6H').reduce(np.mean) + ds.resample(time="6H").reduce(np.mean) For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, ``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` @@ -218,7 +220,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``. .. ipython:: python - ds.resample(time='1H').nearest(tolerance='1H') + ds.resample(time="1H").nearest(tolerance="1H") For more examples of using grouped operations on a time dimension, see diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 768cf6556f9..1eb63d24630 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -4,7 +4,7 @@ Weather and climate data ======================== .. ipython:: python - :suppress: + :suppress: import xarray as xr @@ -56,11 +56,14 @@ coordinate with dates from a no-leap calendar and a .. ipython:: python - from itertools import product - from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in - product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + from itertools import product + from cftime import DatetimeNoLeap + + dates = [ + DatetimeNoLeap(year, month, 1) + for year, month in product(range(1, 3), range(1, 13)) + ] + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") xarray also includes a :py:func:`~xarray.cftime_range` function, which enables creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For @@ -68,8 +71,8 @@ instance, we can create the same dates and DataArray we created above using: .. ipython:: python - dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the @@ -80,8 +83,8 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. ipython:: python - dates.strftime('%c') - da['time'].dt.strftime('%Y%m%d') + dates.strftime("%c") + da["time"].dt.strftime("%Y%m%d") For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: @@ -90,8 +93,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.sel(time='0001') - da.sel(time=slice('0001-05', '0002-02')) + da.sel(time="0001") + da.sel(time=slice("0001-05", "0002-02")) - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", @@ -99,64 +102,65 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.time.dt.year - da.time.dt.month - da.time.dt.season - da.time.dt.dayofyear - da.time.dt.dayofweek - da.time.dt.days_in_month + da.time.dt.year + da.time.dt.month + da.time.dt.season + da.time.dt.dayofyear + da.time.dt.dayofweek + da.time.dt.days_in_month - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. ipython:: python - da.time.dt.ceil('3D') - da.time.dt.floor('5D') - da.time.dt.round('2D') + da.time.dt.ceil("3D") + da.time.dt.floor("5D") + da.time.dt.round("2D") - Group-by operations based on datetime accessor attributes (e.g. by month of the year): .. ipython:: python - da.groupby('time.month').sum() + da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: .. ipython:: python - da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) + da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: .. ipython:: python - da.interp(time=['0001-01-15', '0001-02-15']) + da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: .. ipython:: python - da.differentiate('time') + da.differentiate("time") - Serialization: .. ipython:: python - da.to_netcdf('example-no-leap.nc') - xr.open_dataset('example-no-leap.nc') + da.to_netcdf("example-no-leap.nc") + xr.open_dataset("example-no-leap.nc") .. ipython:: python :suppress: import os - os.remove('example-no-leap.nc') + + os.remove("example-no-leap.nc") - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: .. ipython:: python - da.resample(time='81T', closed='right', label='right', base=3).mean() + da.resample(time="81T", closed="right", label="right", base=3).mean() .. note:: @@ -168,13 +172,13 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: method: .. ipython:: python - :okwarning: + :okwarning: - modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(range(24), [('time', modern_times)]) + modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(range(24), [("time", modern_times)]) da - datetimeindex = da.indexes['time'].to_datetimeindex() - da['time'] = datetimeindex + datetimeindex = da.indexes["time"].to_datetimeindex() + da["time"] = datetimeindex However in this case one should use caution to only perform operations which do not depend on differences between dates (e.g. differentiation, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b71e0baa655..42e20bbf1bd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -4,13 +4,14 @@ What's New ========== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xray import xarray import xarray as xr + np.random.seed(123456) .. _whats-new.0.16.0: @@ -109,6 +110,8 @@ Documentation of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` for 1-d and n-d interpolation (:pull:`3956`). By `Matthias Riße `_. +- Apply ``black`` to all the code in the documentation (:pull:`4012`) + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -1965,8 +1968,8 @@ Enhancements .. ipython:: python - ds = xr.Dataset({'a': 1}) - np.sin(ds) + ds = xr.Dataset({"a": 1}) + np.sin(ds) This obliviates the need for the ``xarray.ufuncs`` module, which will be deprecated in the future when xarray drops support for older versions of @@ -2057,8 +2060,8 @@ Enhancements .. ipython:: python - da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims='x') - da.sum() + da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") + da.sum() (:issue:`1866`) By `Keisuke Fujii `_. @@ -2212,7 +2215,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample('24H', dim='time', how='max') + In [1]: ds.resample("24H", dim="time", how="max") Out[1]: [...] @@ -2222,7 +2225,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample(time='24H').max() + In [1]: ds.resample(time="24H").max() Out[1]: [...] @@ -2292,9 +2295,9 @@ Enhancements In [1]: import xarray as xr - In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=('x', 'y')) + In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) - In [3]: xr.where(arr % 2, 'even', 'odd') + In [3]: xr.where(arr % 2, "even", "odd") Out[3]: array([['even', 'odd', 'even'], @@ -2815,7 +2818,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])}) + In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) Out[1]: Dimensions: (x: 1, y: 2) @@ -3272,10 +3275,10 @@ Enhancements .. ipython:: :verbatim: - In [1]: import xarray as xr; import numpy as np + In [1]: import xarray as xr + ...: import numpy as np - In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) In [3]: arr Out[3]: @@ -3414,7 +3417,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3426,7 +3429,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3449,13 +3452,11 @@ Enhancements .. ipython:: :verbatim: - In [7]: df = pd.DataFrame({'foo': range(3), - ...: 'x': ['a', 'b', 'b'], - ...: 'y': [0, 0, 1]}) + In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) - In [8]: s = df.set_index(['x', 'y'])['foo'] + In [8]: s = df.set_index(["x", "y"])["foo"] - In [12]: arr = xray.DataArray(s, dims='z') + In [12]: arr = xray.DataArray(s, dims="z") In [13]: arr Out[13]: @@ -3464,13 +3465,13 @@ Enhancements Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) - In [19]: arr.indexes['z'] + In [19]: arr.indexes["z"] Out[19]: MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) - In [14]: arr.unstack('z') + In [14]: arr.unstack("z") Out[14]: array([[ 0., nan], @@ -3479,7 +3480,7 @@ Enhancements * x (x) object 'a' 'b' * y (y) int64 0 1 - In [26]: arr.unstack('z').stack(z=('x', 'y')) + In [26]: arr.unstack("z").stack(z=("x", "y")) Out[26]: array([ 0., nan, 1., 2.]) @@ -3507,9 +3508,9 @@ Enhancements for shifting/rotating datasets or arrays along a dimension: .. ipython:: python - :okwarning: + :okwarning: - array = xray.DataArray([5, 6, 7, 8], dims='x') + array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) array.roll(x=2) @@ -3524,8 +3525,8 @@ Enhancements .. ipython:: python - a = xray.DataArray([1, 2, 3], dims='x') - b = xray.DataArray([5, 6], dims='y') + a = xray.DataArray([1, 2, 3], dims="x") + b = xray.DataArray([5, 6], dims="y") a b a2, b2 = xray.broadcast(a, b) @@ -3595,9 +3596,9 @@ Enhancements .. ipython:: :verbatim: - In [5]: array = xray.DataArray([1, 2, 3], dims='x') + In [5]: array = xray.DataArray([1, 2, 3], dims="x") - In [6]: array.reindex(x=[0.9, 1.5], method='nearest', tolerance=0.2) + In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) Out[6]: array([ 2., nan]) @@ -3677,10 +3678,11 @@ Enhancements .. ipython:: :verbatim: - In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), - ...: coords={'x': list('abcdefg'), - ...: 'y': 10 * np.arange(8)}, - ...: dims=['x', 'y']) + In [1]: da = xray.DataArray( + ...: np.arange(56).reshape((7, 8)), + ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, + ...: dims=["x", "y"], + ...: ) In [2]: da Out[2]: @@ -3697,7 +3699,7 @@ Enhancements * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' # we can index by position along each dimension - In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') + In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") Out[3]: array([ 0, 9, 48]) @@ -3707,7 +3709,7 @@ Enhancements * points (points) int64 0 1 2 # or equivalently by label - In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') + In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") Out[9]: array([ 0, 9, 48]) @@ -3721,11 +3723,11 @@ Enhancements .. ipython:: python - ds = xray.Dataset(coords={'x': range(100), 'y': range(100)}) - ds['distance'] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) + ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) - @savefig where_example.png width=4in height=4in - ds.distance.where(ds.distance < 100).plot() + @savefig where_example.png width=4in height=4in + ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` for finite difference calculations along a given axis. @@ -3735,9 +3737,9 @@ Enhancements .. ipython:: python - da = xray.DataArray(np.random.random_sample(size=(5, 4))) - da.where(da < 0.5) - da.where(da < 0.5).to_masked_array(copy=True) + da = xray.DataArray(np.random.random_sample(size=(5, 4))) + da.where(da < 0.5) + da.where(da < 0.5).to_masked_array(copy=True) - Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop @@ -3795,9 +3797,9 @@ Enhancements .. ipython:: :verbatim: - In [1]: years, datasets = zip(*ds.groupby('time.year')) + In [1]: years, datasets = zip(*ds.groupby("time.year")) - In [2]: paths = ['%s.nc' % y for y in years] + In [2]: paths = ["%s.nc" % y for y in years] In [3]: xray.save_mfdataset(datasets, paths) @@ -3870,9 +3872,9 @@ Backwards incompatible changes .. ipython:: :verbatim: - In [1]: ds = xray.Dataset({'x': 0}) + In [1]: ds = xray.Dataset({"x": 0}) - In [2]: xray.concat([ds, ds], dim='y') + In [2]: xray.concat([ds, ds], dim="y") Out[2]: Dimensions: () @@ -3884,13 +3886,13 @@ Backwards incompatible changes Now, the default always concatenates data variables: .. ipython:: python - :suppress: + :suppress: - ds = xray.Dataset({'x': 0}) + ds = xray.Dataset({"x": 0}) .. ipython:: python - xray.concat([ds, ds], dim='y') + xray.concat([ds, ds], dim="y") To obtain the old behavior, supply the argument ``concat_over=[]``. @@ -3903,17 +3905,20 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'a': 1, 'b': ('x', [1, 2, 3])}, - coords={'c': 42}, attrs={'Conventions': 'None'}) + ds = xray.Dataset( + {"a": 1, "b": ("x", [1, 2, 3])}, + coords={"c": 42}, + attrs={"Conventions": "None"}, + ) ds.to_array() - ds.to_array().to_dataset(dim='variable') + ds.to_array().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. ipython:: python - array = xray.DataArray([np.nan, 1, np.nan, 3], dims='x') + array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses @@ -3926,9 +3931,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'y': ('x', [1, 2, 3])}) - ds.assign(z = lambda ds: ds.y ** 2) - ds.assign_coords(z = ('x', ['a', 'b', 'c'])) + ds = xray.Dataset({"y": ("x", [1, 2, 3])}) + ds.assign(z=lambda ds: ds.y ** 2) + ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or coordinate variables. @@ -3941,7 +3946,7 @@ Enhancements .. ipython:: :verbatim: - In [12]: ds.sel(x=1.1, method='nearest') + In [12]: ds.sel(x=1.1, method="nearest") Out[12]: Dimensions: () @@ -3950,7 +3955,7 @@ Enhancements Data variables: y int64 2 - In [13]: ds.sel(x=[1.1, 2.1], method='pad') + In [13]: ds.sel(x=[1.1, 2.1], method="pad") Out[13]: Dimensions: (x: 2) @@ -3976,7 +3981,7 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': np.arange(1000)}) + ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): print(ds) @@ -4014,42 +4019,42 @@ Enhancements need to supply the time dimension explicitly: .. ipython:: python - :verbatim: + :verbatim: - time = pd.date_range('2000-01-01', freq='6H', periods=10) - array = xray.DataArray(np.arange(10), [('time', time)]) - array.resample('1D', dim='time') + time = pd.date_range("2000-01-01", freq="6H", periods=10) + array = xray.DataArray(np.arange(10), [("time", time)]) + array.resample("1D", dim="time") You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='sum', label='right') + array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: .. ipython:: python - :verbatim: + :verbatim: - array.resample('3H', 'time') + array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: .. ipython:: python - :verbatim: + :verbatim: - array.groupby('time.day').first() + array.groupby("time.day").first() These methods combine well with ``resample``: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='first') + array.resample("1D", dim="time", how="first") - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension @@ -4057,9 +4062,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': range(3), 'y': ('x', list('abc'))}) - ds - ds.swap_dims({'x': 'y'}) + ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) + ds + ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now @@ -4105,8 +4110,8 @@ Breaking changes .. ipython:: python - lhs = xray.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])]) + lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs + rhs :ref:`For dataset construction and merging`, we align based on the @@ -4114,14 +4119,14 @@ Breaking changes .. ipython:: python - xray.Dataset({'foo': lhs, 'bar': rhs}) + xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: .. ipython:: python - lhs.coords['rhs'] = rhs + lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: @@ -4144,8 +4149,8 @@ Breaking changes .. ipython:: python - a = xray.DataArray([1, 2], coords={'c': 0}, dims='x') - b = xray.DataArray([1, 2], coords={'c': ('x', [0, 0])}, dims='x') + a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") + b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") (a + b).coords This functionality can be controlled through the ``compat`` option, which @@ -4156,9 +4161,10 @@ Breaking changes .. ipython:: python - time = xray.DataArray(pd.date_range('2000-01-01', periods=365), - dims='time', name='time') - counts = time.groupby('time.month').count() + time = xray.DataArray( + pd.date_range("2000-01-01", periods=365), dims="time", name="time" + ) + counts = time.groupby("time.month").count() counts.sel(month=2) Previously, you would need to use something like @@ -4168,8 +4174,8 @@ Breaking changes .. ipython:: python - ds = xray.Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')}) - ds['t.season'] + ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) + ds["t.season"] Previously, it returned numbered seasons 1 through 4. - We have updated our use of the terms of "coordinates" and "variables". What @@ -4192,8 +4198,8 @@ Enhancements .. ipython:: python - data = xray.DataArray([1, 2, 3], [('x', range(3))]) - data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + data = xray.DataArray([1, 2, 3], [("x", range(3))]) + data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") This will be especially useful once pandas 0.16 is released, at which point xray will immediately support reindexing with @@ -4212,15 +4218,15 @@ Enhancements makes it easy to drop explicitly listed variables or index labels: .. ipython:: python - :okwarning: + :okwarning: # drop variables - ds = xray.Dataset({'x': 0, 'y': 1}) - ds.drop('x') + ds = xray.Dataset({"x": 0, "y": 1}) + ds.drop("x") # drop index labels - arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))]) - arr.drop(['a', 'c'], dim='x') + arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))]) + arr.drop(["a", "c"], dim="x") - ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. @@ -4288,7 +4294,8 @@ Backwards incompatible changes .. ipython:: python from datetime import datetime - xray.Dataset({'t': [datetime(2000, 1, 1)]}) + + xray.Dataset({"t": [datetime(2000, 1, 1)]}) - xray now has support (including serialization to netCDF) for :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects @@ -4304,8 +4311,8 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'tmin': ([], 25, {'units': 'celsius'})}) - ds.tmin.units + ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) + ds.tmin.units Tab-completion for these variables should work in editors such as IPython. However, setting variables or attributes in this fashion is not yet @@ -4315,7 +4322,7 @@ Enhancements .. ipython:: python - array = xray.DataArray(np.zeros(5), dims=['x']) + array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 array diff --git a/xarray/core/common.py b/xarray/core/common.py index 1e7069ec51f..e343f342040 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -447,7 +447,7 @@ def assign_coords(self, coords=None, **coords_kwargs): New coordinate can also be attached to an existing dimension: >>> lon_2 = np.array([300, 289, 0, 1]) - >>> da.assign_coords(lon_2=('lon', lon_2)) + >>> da.assign_coords(lon_2=("lon", lon_2)) array([0.28298 , 0.667347, 0.657938, 0.177683]) Coordinates: @@ -456,7 +456,7 @@ def assign_coords(self, coords=None, **coords_kwargs): Note that the same result can also be obtained with a dict e.g. - >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)}) + >>> _ = da.assign_coords({"lon_2": ("lon", lon_2)}) Notes ----- diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ffa05ca64f0..5ced7e251c4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3495,17 +3495,18 @@ def pad( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) - >>> arr.pad(x=(1,2), constant_values=0) + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) + >>> arr.pad(x=(1, 2), constant_values=0) array([0, 5, 6, 7, 0, 0]) Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan - >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], - dims=["x", "y"], - coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} - ) + >>> da = xr.DataArray( + ... [[0, 1, 2, 3], [10, 11, 12, 13]], + ... dims=["x", "y"], + ... coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, + ... ) >>> da.pad(x=1) array([[nan, nan, nan, nan], @@ -3592,8 +3593,9 @@ def idxmin( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.min() array(-2) @@ -3604,13 +3606,15 @@ def idxmin( array('e', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.min(dim="x") array([-2., -4., 1.]) @@ -3686,8 +3690,9 @@ def idxmax( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.max() array(2) @@ -3698,13 +3703,15 @@ def idxmax( array('b', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.max(dim="x") array([2., 2., 1.]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 53aa00f22ce..dd7871eaf3a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1055,9 +1055,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy( - ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} - ... ) + >>> ds.copy(data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]}) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -6061,8 +6059,8 @@ def pad( Examples -------- - >>> ds = xr.Dataset({'foo': ('x', range(5))}) - >>> ds.pad(x=(1,2)) + >>> ds = xr.Dataset({"foo": ("x", range(5))}) + >>> ds.pad(x=(1, 2)) Dimensions: (x: 8) Dimensions without coordinates: x @@ -6156,17 +6154,20 @@ def idxmin( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.min(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.min(dim="x") Dimensions: (y: 3) Coordinates: @@ -6174,7 +6175,7 @@ def idxmin( Data variables: int int64 -2 float (y) float64 -2.0 -4.0 1.0 - >>> ds.argmin(dim='x') + >>> ds.argmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6182,7 +6183,7 @@ def idxmin( Data variables: int int64 4 float (y) int64 4 0 2 - >>> ds.idxmin(dim='x') + >>> ds.idxmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6251,17 +6252,20 @@ def idxmax( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.max(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.max(dim="x") Dimensions: (y: 3) Coordinates: @@ -6269,7 +6273,7 @@ def idxmax( Data variables: int int64 2 float (y) float64 2.0 2.0 1.0 - >>> ds.argmax(dim='x') + >>> ds.argmax(dim="x") Dimensions: (y: 3) Coordinates: @@ -6277,7 +6281,7 @@ def idxmax( Data variables: int int64 1 float (y) int64 0 2 2 - >>> ds.idxmax(dim='x') + >>> ds.idxmax(dim="x") Dimensions: (y: 3) Coordinates: From 3820fb77256682d909c1e41d962e29bec0edd62d Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 29 Apr 2020 18:12:23 +0200 Subject: [PATCH 009/342] Pint support for DataArray (#3643) * remove xfail marks from median and cumprod * remove all xfails not related to indexes or external packages * switch away from using assert_equal_with_units * use assert_allclose in a few cases instead * don't use a kwarg for searchsorted normally, this should work, but the documentation mismatches the implementation of searchsorted and names the keys as `keys` instead of `v` * move the tests for item into their own test function * move the searchsorted tests into their own test function * remove a wrapping pytest.param * treat objects implementing __array_function__ the same as ndarray * mark numpy.median as xfailing * remove the xfail marks for the all and any tests * use assert_units_equal to check the resulting units * don't attempt to use interpolate_na with int dtype arrays * update the xfail reason for DataArray.interpolate_na * xfail the compatible units bivariate_ufunc test and don't use 0 * combine and expand the reindex and interp tests * combine and expand the reindex_like and interp_like tests * xfail the quantile tests if pint is not recent enough * xfail the rolling tests * don't xfail combine_first it currently does not test indexing, so probably will need a new test for that. * use numpy's assert_allclose * don't add dimension coordinates if they're not necessary * add the PR to the list of related PRs * move the whats-new.rst entry to 0.16.0 * check for __array_ufunc__ to decide if the type is supported * xfail the bivariate ufunc tests * remove the check for __array_ufunc__ * skip the DataArray.identical tests * use pytest.param --- doc/whats-new.rst | 3 + xarray/tests/test_units.py | 520 ++++++++++++++++++++----------------- 2 files changed, 289 insertions(+), 234 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 42e20bbf1bd..051a41a57e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ New Features - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ +- More support for unit aware arrays with pint (:pull:`3643`) + By `Justus Magin `_. + - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 2826dc2479c..5dd4a42cff0 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1660,7 +1660,7 @@ def test_missing_value_fillna(self, unit, error): method("equals"), pytest.param( method("identical"), - marks=pytest.mark.skip(reason="behaviour of identical is unclear"), + marks=pytest.mark.skip(reason="behavior of identical is undecided"), ), ), ids=repr, @@ -1885,7 +1885,10 @@ def test_squeeze(self, dtype): method("coarsen", windows={"y": 2}, func=np.mean), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), pytest.param( method("rank", dim="x"), @@ -2161,8 +2164,8 @@ class TestDataArray: "with_dims", marks=pytest.mark.xfail(reason="units in indexes are not supported"), ), - pytest.param("with_coords"), - pytest.param("without_coords"), + "with_coords", + "without_coords", ), ) def test_init(self, variant, dtype): @@ -2224,21 +2227,17 @@ def test_repr(self, func, variant, dtype): @pytest.mark.parametrize( "func", ( - pytest.param( - function("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - function("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("all"), + function("any"), function("argmax"), function("argmin"), function("max"), function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail(reason="not implemented by xarray"), + marks=pytest.mark.xfail( + reason="median does not work with dataarrays yet" + ), ), function("min"), pytest.param( @@ -2249,18 +2248,9 @@ def test_repr(self, func, variant, dtype): function("std"), function("var"), function("cumsum"), - pytest.param( - function("cumprod"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("cumprod"), + method("all"), + method("any"), method("argmax"), method("argmin"), method("max"), @@ -2269,18 +2259,13 @@ def test_repr(self, func, variant, dtype): method("min"), pytest.param( method("prod"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), + marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), method("sum"), method("std"), method("var"), method("cumsum"), - pytest.param( - method("cumprod"), - marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"), - ), + method("cumprod"), ), ids=repr, ) @@ -2296,7 +2281,8 @@ def test_aggregation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", @@ -2314,7 +2300,8 @@ def test_unary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2333,7 +2320,8 @@ def test_binary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "comparison", @@ -2383,7 +2371,8 @@ def test_comparison_operations(self, comparison, unit, error, dtype): strip_units(convert_units(to_compare_with, expected_units)), ) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "units,error", @@ -2411,9 +2400,10 @@ def test_univariate_ufunc(self, units, error, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="xarray's `np.maximum` strips units") + @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__") @pytest.mark.parametrize( "unit,error", ( @@ -2422,7 +2412,12 @@ def test_univariate_ufunc(self, units, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param( + unit_registry.mm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="pint converts to the wrong units"), + ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2433,7 +2428,7 @@ def test_bivariate_ufunc(self, unit, error, dtype): if error is not None: with pytest.raises(error): - np.maximum(data_array, 0 * unit) + np.maximum(data_array, 1 * unit) return @@ -2441,16 +2436,18 @@ def test_bivariate_ufunc(self, unit, error, dtype): expected = attach_units( np.maximum( strip_units(data_array), - strip_units(convert_units(0 * unit, expected_units)), + strip_units(convert_units(1 * unit, expected_units)), ), expected_units, ) - actual = np.maximum(data_array, 0 * unit) - assert_equal_with_units(expected, actual) + actual = np.maximum(data_array, 1 * unit) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - actual = np.maximum(0 * unit, data_array) - assert_equal_with_units(expected, actual) + actual = np.maximum(1 * unit, data_array) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): @@ -2466,7 +2463,8 @@ def test_numpy_properties(self, property, dtype): ) actual = getattr(data_array, property) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2481,16 +2479,86 @@ def test_numpy_methods(self, func, dtype): expected = attach_units(strip_units(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) + + def test_item(self, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + func = method("item", 2) + + expected = func(strip_units(data_array)) * unit_registry.m + actual = func(data_array) + + np.testing.assert_allclose(expected, actual) + + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "func", + ( + method("searchsorted", 5), + pytest.param( + function("searchsorted", 5), + marks=pytest.mark.xfail( + reason="xarray does not implement __array_function__" + ), + ), + ), + ids=repr, + ) + def test_searchsorted(self, func, unit, error, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + scalar_types = (int, float) + args = list(value * unit for value in func.args) + kwargs = { + key: (value * unit if isinstance(value, scalar_types) else value) + for key, value in func.kwargs.items() + } + + if error is not None: + with pytest.raises(error): + func(data_array, *args, **kwargs) + + return + + units = extract_units(data_array) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] + stripped_kwargs = { + key: strip_units(convert_units(value, units)) + for key, value in kwargs.items() + } + expected = attach_units( + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, + ) + actual = func(data_array, *args, **kwargs) + + assert_units_equal(expected, actual) + np.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( method("clip", min=3, max=8), pytest.param( - method("searchsorted", v=5), + function("clip", a_min=3, a_max=8), marks=pytest.mark.xfail( - reason="searchsorted somehow requires a undocumented `keys` argument" + reason="xarray does not implement __array_function__" ), ), ), @@ -2513,28 +2581,32 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): data_array = xr.DataArray(data=array) scalar_types = (int, float) + args = list(value * unit for value in func.args) kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): - func(data_array, **kwargs) + func(data_array, *args, **kwargs) return units = extract_units(data_array) - expected_units = extract_units(func(array, **kwargs)) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] stripped_kwargs = { key: strip_units(convert_units(value, units)) for key, value in kwargs.items() } expected = attach_units( - func(strip_units(data_array), **stripped_kwargs), expected_units + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, ) - actual = func(data_array, **kwargs) + actual = func(data_array, *args, **kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -2551,15 +2623,13 @@ def test_missing_value_detection(self, func, dtype): ) * unit_registry.degK ) - x = np.arange(array.shape[0]) * unit_registry.m - y = np.arange(array.shape[1]) * unit_registry.m - - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + data_array = xr.DataArray(data=array) expected = func(strip_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -2576,7 +2646,8 @@ def test_missing_value_filling(self, func, dtype): ) actual = func(data_array, dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2586,12 +2657,7 @@ def test_missing_value_filling(self, func, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="fillna converts to value's unit"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2629,7 +2695,8 @@ def test_fillna(self, fill_value, unit, error, dtype): ) actual = func(data_array, value=value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) def test_dropna(self, dtype): array = ( @@ -2643,18 +2710,13 @@ def test_dropna(self, dtype): expected = attach_units(strip_units(data_array).dropna(dim="x"), units) actual = data_array.dropna(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( - pytest.param( - 1, - id="no_unit", - marks=pytest.mark.xfail( - reason="pint's isin implementation does not work well with mixed args" - ), - ), + pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), @@ -2677,22 +2739,11 @@ def test_isin(self, unit, dtype): ) & array.check(unit) actual = data_array.isin(values) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( - "variant", - ( - pytest.param( - "masking", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - "replacing_scalar", - "replacing_array", - pytest.param( - "dropping", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - ), + "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", @@ -2742,22 +2793,24 @@ def test_where(self, variant, unit, error, dtype): ) actual = data_array.where(**kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="interpolate strips units") - def test_interpolate_na(self, dtype): + @pytest.mark.xfail(reason="uses numpy.vectorize") + def test_interpolate_na(self): array = ( np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1]) * unit_registry.m ) x = np.arange(len(array)) - data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype) + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") units = extract_units(data_array) expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units) actual = data_array.interpolate_na(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2767,18 +2820,8 @@ def test_interpolate_na(self, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), - pytest.param( - unit_registry.m, - None, - id="identical_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit",), + pytest.param(unit_registry.m, None, id="identical_unit",), ), ) def test_combine_first(self, unit, error, dtype): @@ -2807,7 +2850,8 @@ def test_combine_first(self, unit, error, dtype): ) actual = data_array.combine_first(other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2829,7 +2873,17 @@ def test_combine_first(self, unit, error, dtype): "coords", ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip(reason="the behavior of identical is undecided"), + ), + ), + ids=repr, + ) def test_comparisons(self, func, variation, unit, dtype): def is_compatible(a, b): a = a if a is not None else 1 @@ -2903,7 +2957,8 @@ def test_broadcast_like(self, unit, dtype): ) actual = arr1.broadcast_like(arr2) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2950,7 +3005,6 @@ def test_broadcast_equals(self, unit, dtype): method("reset_coords", names="x2"), method("copy"), method("astype", np.float32), - method("item", 1), ), ids=repr, ) @@ -2978,7 +3032,8 @@ def test_content_manipulation(self, func, dtype): expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr @@ -3004,7 +3059,9 @@ def test_content_manipulation_with_units(self, func, unit, dtype): ) actual = func(data_array, **kwargs) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "indices", @@ -3024,7 +3081,8 @@ def test_isel(self, indices, dtype): ) actual = data_array.isel(x=indices) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3067,7 +3125,9 @@ def test_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3110,7 +3170,9 @@ def test_loc(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.loc[{"x": values}] - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3153,7 +3215,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.drop_sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "shape", @@ -3181,7 +3245,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(), extract_units(data_array) ) actual = data_array.squeeze() - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) # try squeezing the dimensions separately names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) @@ -3190,7 +3256,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(dim=name), extract_units(data_array) ) actual = data_array.squeeze(dim=name) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3212,49 +3280,42 @@ def test_head_tail_thin(self, func, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex"), ), + ids=repr, ) - def test_interp(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + y = np.arange(10) * coord_unit - if error is not None: - with pytest.raises(error): - data_array.interp(x=new_coords) - - return + x = np.arange(10) + new_x = np.arange(10) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x") units = extract_units(data_array) - expected = attach_units( - strip_units(data_array).interp( - x=strip_units(convert_units(new_coords, {None: unit_registry.m})) - ), - units, - ) - actual = data_array.interp(x=new_coords) + expected = attach_units(func(strip_units(data_array), x=new_x), units) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) - @pytest.mark.xfail(reason="indexes strip units") + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -3267,79 +3328,66 @@ def test_interp(self, unit, error): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_interp_like(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } - - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + @pytest.mark.parametrize( + "func", (method("interp"), method("reindex")), ids=repr, + ) + def test_interp_reindex_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(10) + 0.5) * unit + data_array = xr.DataArray(array, coords={"x": x}, dims="x") if error is not None: with pytest.raises(error): - data_array.interp_like(other) + func(data_array, x=new_x) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).interp_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + x=strip_units(convert_units(new_x, {None: unit_registry.m})), ), units, ) - actual = data_array.interp_like(other) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex_like"), ), + ids=repr, ) - def test_reindex(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex_like(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - func = method("reindex") - - if error is not None: - with pytest.raises(error): - func(data_array, x=new_coords) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + coord = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(-2, 2) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x") + other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x") - expected = attach_units( - func( - strip_units(data_array), - x=strip_units(convert_units(new_coords, {None: unit_registry.m})), - ), - {None: unit_registry.degK}, - ) - actual = func(data_array, x=new_coords) + units = extract_units(data_array) + expected = attach_units(func(strip_units(data_array), other), units) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3354,38 +3402,35 @@ def test_reindex(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } + @pytest.mark.parametrize( + "func", (method("interp_like"), method("reindex_like")), ids=repr, + ) + def test_interp_reindex_like_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(-2, 2) + 0.5) * unit - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + data_array = xr.DataArray(array, coords={"x": x}, dims="x") + other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x") if error is not None: with pytest.raises(error): - data_array.reindex_like(other) + func(data_array, other) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).reindex_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + strip_units(convert_units(other, {None: unit_registry.m})), ), units, ) - actual = data_array.reindex_like(other) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3407,7 +3452,8 @@ def test_stacking_stacked(self, func, dtype): expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m}) actual = func(stacked) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): @@ -3430,7 +3476,8 @@ def test_to_unstacked_dataset(self, dtype): ).rename({elem.magnitude: elem for elem in x}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3438,9 +3485,7 @@ def test_to_unstacked_dataset(self, dtype): method("transpose", "y", "x", "z"), method("stack", a=("x", "y")), method("set_index", x="x2"), - pytest.param( - method("shift", x=2), marks=pytest.mark.xfail(reason="strips units") - ), + method("shift", x=2), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), @@ -3466,7 +3511,8 @@ def test_stacking_reordering(self, func, dtype): expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3476,16 +3522,13 @@ def test_stacking_reordering(self, func, dtype): method("integrate", dim="x"), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), - ), - method("reduce", func=np.sum, dim="x"), - pytest.param( - lambda x: x.dot(x), - id="method_dot", marks=pytest.mark.xfail( - reason="pint does not implement the dot method" + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", ), ), + method("reduce", func=np.sum, dim="x"), + pytest.param(lambda x: x.dot(x), id="method_dot"), ), ids=repr, ) @@ -3512,7 +3555,8 @@ def test_computation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3522,7 +3566,9 @@ def test_computation(self, func, dtype): method("coarsen", y=2), pytest.param( method("rolling", y=3), - marks=pytest.mark.xfail(reason="rolling strips units"), + marks=pytest.mark.xfail( + reason="numpy.lib.stride_tricks.as_strided converts to ndarray" + ), ), pytest.param( method("rolling_exp", y=3), @@ -3545,7 +3591,8 @@ def test_computation_objects(self, func, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -3559,7 +3606,8 @@ def test_resample(self, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3569,7 +3617,10 @@ def test_resample(self, dtype): method("last"), pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), ), ids=repr, @@ -3598,7 +3649,8 @@ def test_grouped_operations(self, func, dtype): ) actual = func(data_array.groupby("y")) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) class TestDataset: From 6ce07249ca7eabc181b2b88a5723e66fc06036d4 Mon Sep 17 00:00:00 2001 From: Maik Riechert Date: Sun, 3 May 2020 14:34:26 +0100 Subject: [PATCH 010/342] fix to_netcdf docstring typo (#4021) --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dd7871eaf3a..01dda828d8a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1535,7 +1535,7 @@ def to_netcdf( ``dask.delayed.Delayed`` object that can be computed later. invalid_netcdf: boolean Only valid along with engine='h5netcdf'. If True, allow writing - hdf5 files which are valid netcdf as described in + hdf5 files which are invalid netcdf as described in https://github.com/shoyer/h5netcdf. Default: False. """ if encoding is None: From 1b3c76863041d3265e5d011e68482944c447d78f Mon Sep 17 00:00:00 2001 From: Prajjwal Nijhara Date: Tue, 5 May 2020 07:27:30 +0530 Subject: [PATCH 011/342] chore: Remove unnecessary comprehension (#4026) * chore: Remove unnecessary comprehension * Update whats-new.rst --- doc/whats-new.rst | 4 ++++ xarray/core/groupby.py | 2 +- xarray/core/pdcompat.py | 2 +- xarray/core/variable.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 051a41a57e5..1993e543322 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -129,6 +129,10 @@ Internal Changes - Use ``async`` / ``await`` for the asynchronous distributed tests. (:issue:`3987`, :pull:`3989`) By `Justus Magin `_. +- Remove unnecessary comprehensions becuase the built-in functions like + ``all``, ``any``, ``enumerate``, ``sum``, ``tuple`` etc. can work directly with a + generator expression. (:pull:`4026`) + By `Prajjwal Nijhara `_. .. _whats-new.0.15.1: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 148e16863d1..85dd735c2fe 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -29,7 +29,7 @@ def check_reduce_dims(reduce_dims, dimensions): if reduce_dims is not ...: if is_scalar(reduce_dims): reduce_dims = [reduce_dims] - if any([dim not in dimensions for dim in reduce_dims]): + if any(dim not in dimensions for dim in reduce_dims): raise ValueError( "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." % (reduce_dims, dimensions) diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index f2e4518e0dc..f2e22329fc8 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -55,4 +55,4 @@ def count_not_none(*args) -> int: Copied from pandas.core.common.count_not_none (not part of the public API) """ - return sum([arg is not None for arg in args]) + return sum(arg is not None for arg in args) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 68e823ca426..e19132b1b06 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2412,7 +2412,7 @@ def assert_unique_multiindex_level_names(variables): duplicate_names = [v for v in level_names.values() if len(v) > 1] if duplicate_names: - conflict_str = "\n".join([", ".join(v) for v in duplicate_names]) + conflict_str = "\n".join(", ".join(v) for v in duplicate_names) raise ValueError("conflicting MultiIndex level name(s):\n%s" % conflict_str) # Check confliction between level names and dimensions GH:2299 for k, v in variables.items(): From 1c5adc9fba6dcde73f31282719d3d8614e54f59b Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 5 May 2020 12:28:01 -0700 Subject: [PATCH 012/342] Support overriding existing variables in to_zarr() without appending (#4029) * Support overriding existing variables in to_zarr() without appending This should be useful for cases where users want to update values in existing Zarr datasets. * Update docstring for to_zarr --- doc/whats-new.rst | 4 ++- xarray/backends/api.py | 35 ++++++++++++++----- xarray/backends/zarr.py | 31 ++++++++++------- xarray/core/dataset.py | 13 +++---- xarray/tests/test_backends.py | 65 +++++++++++++---------------------- 5 files changed, 79 insertions(+), 69 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1993e543322..cdec7d81bbc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,7 +49,9 @@ New Features By `Todd Jennings `_ - More support for unit aware arrays with pint (:pull:`3643`) By `Justus Magin `_. - +- Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even + without ``append_dim``, as long as dimension sizes do not change. + By `Stephan Hoyer `_. - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c7481e22b59..184aad579a2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1279,18 +1279,35 @@ def _validate_append_dim_and_encoding( return if append_dim: if append_dim not in ds.dims: - raise ValueError(f"{append_dim} not a valid dimension in the Dataset") - for data_var in ds_to_append: - if data_var in ds: - if append_dim is None: + raise ValueError( + f"append_dim={append_dim!r} does not match any existing " + f"dataset dimensions {ds.dims}" + ) + for var_name in ds_to_append: + if var_name in ds: + if ds_to_append[var_name].dims != ds[var_name].dims: + raise ValueError( + f"variable {var_name!r} already exists with different " + f"dimension names {ds[var_name].dims} != " + f"{ds_to_append[var_name].dims}, but changing variable " + "dimensions is not supported by to_zarr()." + ) + existing_sizes = { + k: v for k, v in ds[var_name].sizes.items() if k != append_dim + } + new_sizes = { + k: v for k, v in ds_to_append[var_name].sizes.items() if k != append_dim + } + if existing_sizes != new_sizes: raise ValueError( - "variable '{}' already exists, but append_dim " - "was not set".format(data_var) + f"variable {var_name!r} already exists with different " + "dimension sizes: {existing_sizes} != {new_sizes}. " + "to_zarr() only supports changing dimension sizes when " + f"explicitly appending, but append_dim={append_dim!r}." ) - if data_var in encoding.keys(): + if var_name in encoding.keys(): raise ValueError( - "variable '{}' already exists, but encoding was" - "provided".format(data_var) + f"variable {var_name!r} already exists, but encoding was provided" ) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 973c167911e..de6b627447e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -445,18 +445,23 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No fill_value = attrs.pop("_FillValue", None) if v.encoding == {"_FillValue": None} and fill_value is None: v.encoding = {} - if name in self.ds: + + if self.append_dim is not None and self.append_dim in dims: + # resize existing variable zarr_array = self.ds[name] - if self.append_dim in dims: - # this is the DataArray that has append_dim as a - # dimension - append_axis = dims.index(self.append_dim) - new_shape = list(zarr_array.shape) - new_shape[append_axis] += v.shape[append_axis] - new_region = [slice(None)] * len(new_shape) - new_region[append_axis] = slice(zarr_array.shape[append_axis], None) - zarr_array.resize(new_shape) - writer.add(v.data, zarr_array, region=tuple(new_region)) + append_axis = dims.index(self.append_dim) + + new_region = [slice(None)] * len(dims) + new_region[append_axis] = slice(zarr_array.shape[append_axis], None) + region = tuple(new_region) + + new_shape = list(zarr_array.shape) + new_shape[append_axis] += v.shape[append_axis] + zarr_array.resize(new_shape) + elif name in self.ds: + # override existing variable + zarr_array = self.ds[name] + region = None else: # new variable encoding = extract_zarr_variable_encoding( @@ -474,7 +479,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding ) zarr_array.attrs.put(encoded_attrs) - writer.add(v.data, zarr_array) + region = None + + writer.add(v.data, zarr_array, region=region) def close(self): if self._consolidate_on_close: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 01dda828d8a..2a8b7bdbb9a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1579,7 +1579,7 @@ def to_zarr( mode : {'w', 'w-', 'a', None} Persistence mode: 'w' means create (overwrite if exists); 'w-' means create (fail if exists); - 'a' means append (create if does not exist). + 'a' means override existing variables (create if does not exist). If ``append_dim`` is set, ``mode`` can be omitted as it is internally set to ``'a'``. Otherwise, ``mode`` will default to `w-` if not set. @@ -1598,7 +1598,8 @@ def to_zarr( If True, apply zarr's `consolidate_metadata` function to the store after writing. append_dim: hashable, optional - If set, the dimension on which the data will be appended. + If set, the dimension along which the data will be appended. All + other dimensions on overriden variables must remain the same size. References ---------- @@ -1766,7 +1767,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise" ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -5933,7 +5934,7 @@ def polyfit( "The number of data points must exceed order to scale the covariance matrix." ) fac = residuals / (x.shape[0] - order) - covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac + covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j")) * fac variables[name + "polyfit_covariance"] = covariance return Dataset(data_vars=variables, attrs=self.attrs.copy()) @@ -6199,7 +6200,7 @@ def idxmin( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) def idxmax( @@ -6297,7 +6298,7 @@ def idxmax( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 916c29ba7bd..90deea51d2a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1526,12 +1526,6 @@ def roundtrip( with self.open(store_target, **open_kwargs) as ds: yield ds - @contextlib.contextmanager - def roundtrip_append( - self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False - ): - pytest.skip("zarr backend does not support appending") - def test_roundtrip_consolidated(self): pytest.importorskip("zarr", minversion="2.2.1.dev2") expected = create_test_data() @@ -1826,7 +1820,7 @@ def test_encoding_kwarg_fixed_width_string(self): # not relevant for zarr, since we don't use EncodedStringCoder pass - # TODO: someone who understand caching figure out whether chaching + # TODO: someone who understand caching figure out whether caching # makes sense for Zarr backend @pytest.mark.xfail(reason="Zarr caching not implemented") def test_dataset_caching(self): @@ -1834,55 +1828,44 @@ def test_dataset_caching(self): @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") def test_append_write(self): - ds, ds_to_append, _ = create_append_test_data() - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") - ds_to_append.to_zarr(store_target, append_dim="time") - original = xr.concat([ds, ds_to_append], dim="time") - assert_identical(original, xr.open_zarr(store_target)) - - @pytest.mark.xfail(reason="Zarr stores can not be appended to") - def test_append_overwrite_values(self): - super().test_append_overwrite_values() + super().test_append_write() def test_append_with_invalid_dim_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim not valid - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises( + ValueError, match="does not match any existing dataset dimensions" + ): ds_to_append.to_zarr(store_target, append_dim="notvalid") - def test_append_with_append_dim_not_set_raises(self): + def test_append_with_no_dims_raises(self): + with self.create_zarr_target() as store_target: + Dataset({"foo": ("x", [1])}).to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="different dimension names"): + Dataset({"foo": ("y", [2])}).to_zarr(store_target, mode="a") + def test_append_with_append_dim_not_set_raises(self): ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim not set - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="different dimension sizes"): ds_to_append.to_zarr(store_target, mode="a") def test_append_with_mode_not_a_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim is set and mode != 'a' - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises( + ValueError, match="append_dim was set along with mode='w'" + ): ds_to_append.to_zarr(store_target, mode="w", append_dim="time") def test_append_with_existing_encoding_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when providing encoding to existing variable - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="but encoding was provided"): ds_to_append.to_zarr( store_target, append_dim="time", From 59b470f5d1464366dc55b082618ea87da8fbc9af Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 5 May 2020 14:49:25 -0700 Subject: [PATCH 013/342] Allow warning with cartopy in docs plotting build (#4032) It looks like this is triggered by the new cartopy version now being installed on RTD (version 0.17.0 -> 0.18.0). Long term we should fix this, but for now it's better just to disable the warning. Here's the message from RTD: ``` Exception occurred: File "/home/docs/checkouts/readthedocs.org/user_builds/xray/conda/latest/lib/python3.8/site-packages/IPython/sphinxext/ipython_directive.py", line 586, in process_input raise RuntimeError('Non Expected warning in `{}` line {}'.format(filename, lineno)) RuntimeError: Non Expected warning in `/home/docs/checkouts/readthedocs.org/user_builds/xray/checkouts/latest/doc/plotting.rst` line 732 The full traceback has been saved in /tmp/sphinx-err-qav6jjmm.log, if you want to report the issue to the developers. Please also report this if it was a user error, so that a better error message can be provided next time. A bug report can be filed in the tracker at . Thanks! >>>------------------------------------------------------------------------- Warning in /home/docs/checkouts/readthedocs.org/user_builds/xray/checkouts/latest/doc/plotting.rst at block ending on line 732 Specify :okwarning: as an option in the ipython:: block to suppress this message ---------------------------------------------------------------------------- /home/docs/checkouts/readthedocs.org/user_builds/xray/checkouts/latest/xarray/plot/facetgrid.py:373: UserWarning: Tight layout not applied. The left and right margins cannot be made large enough to accommodate all axes decorations. self.fig.tight_layout() <<<------------------------------------------------------------------------- ``` https://readthedocs.org/projects/xray/builds/10969146/ --- doc/plotting.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/plotting.rst b/doc/plotting.rst index fb30417e2c6..40c0ca1a496 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -717,6 +717,7 @@ function using the ``subplot_kws`` keyword. The axes for the subplots created by faceting are accessible in the object returned by ``plot``: .. ipython:: python + :okwarning: p = air.isel(time=[0, 4]).plot( transform=ccrs.PlateCarree(), From 9ec3f7b44d50ffa2298a9796847e69953ae96cbd Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 5 May 2020 18:50:20 -0700 Subject: [PATCH 014/342] Remove broken test for Panel with to_pandas() (#4028) * Remove broken test for Panel with to_pandas() We don't support creating a Panel with to_pandas() with *any* version of pandas at present, so this test was previous broken if pandas < 0.25 was isntalled. * remove unused import * Fixup LooseVersion import --- xarray/tests/test_dataset.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a1cb7361e77..2a89920766c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -32,7 +32,6 @@ from . import ( InaccessibleArray, - LooseVersion, UnexpectedDataAccess, assert_allclose, assert_array_equal, @@ -496,16 +495,11 @@ def test_constructor_pandas_single(self): DataArray(np.random.rand(4, 3), dims=["a", "b"]), # df ] - if LooseVersion(pd.__version__) < "0.25.0": - das.append(DataArray(np.random.rand(4, 3, 2), dims=["a", "b", "c"])) - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - for a in das: - pandas_obj = a.to_pandas() - ds_based_on_pandas = Dataset(pandas_obj) - for dim in ds_based_on_pandas.data_vars: - assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim]) + for a in das: + pandas_obj = a.to_pandas() + ds_based_on_pandas = Dataset(pandas_obj) + for dim in ds_based_on_pandas.data_vars: + assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim]) def test_constructor_compat(self): data = {"x": DataArray(0, coords={"y": 1}), "y": ("z", [1, 1, 1])} From fe7962a7016dc9aa25e54cc857efa4aa52baed8a Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 6 May 2020 12:39:34 -0400 Subject: [PATCH 015/342] Transpose coords by default (#3824) * transpose coords by default * whatsnew * Update doc/whats-new.rst Co-authored-by: crusaderky * Update whats-new.rst Co-authored-by: crusaderky --- doc/whats-new.rst | 6 ++++++ xarray/core/dataarray.py | 14 ++------------ xarray/core/groupby.py | 19 ++----------------- xarray/tests/test_dataarray.py | 6 ------ 4 files changed, 10 insertions(+), 35 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cdec7d81bbc..1204155f062 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,12 @@ v0.16.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ + +- ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False`` + to revert to previous behavior. +- :meth:`DataArray.transpose` will now transpose coordinates by default. + Pass ``transpose_coords=False`` to revert to previous behaviour. + By `Maximilian Roos `_ - Alternate draw styles for :py:meth:`plot.step` must be passed using the ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or ``ls``) keyword argument, in line with the `upstream change in Matplotlib diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5ced7e251c4..fc9e3410247 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1,6 +1,5 @@ import datetime import functools -import warnings from numbers import Number from typing import ( TYPE_CHECKING, @@ -1915,7 +1914,7 @@ def to_unstacked_dataset(self, dim, level=0): # unstacked dataset return Dataset(data_dict) - def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArray": + def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArray": """Return a new DataArray object with transposed dimensions. Parameters @@ -1923,7 +1922,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra *dims : hashable, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. - transpose_coords : boolean, optional + transpose_coords : boolean, default True If True, also transpose the coordinates of this DataArray. Returns @@ -1952,15 +1951,6 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra coords[name] = coord.variable.transpose(*coord_dims) return self._replace(variable, coords) else: - if transpose_coords is None and any(self[c].ndim > 1 for c in self.coords): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, these coordinates " - "will be transposed as well unless you specify " - "transpose_coords=False.", - FutureWarning, - stacklevel=2, - ) return self._replace(variable) @property diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 85dd735c2fe..299cb8ec4fa 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -272,7 +272,7 @@ def __init__( squeeze=False, grouper=None, bins=None, - restore_coord_dims=None, + restore_coord_dims=True, cut_kwargs=None, ): """Create a GroupBy object @@ -292,7 +292,7 @@ def __init__( bins : array-like, optional If `bins` is specified, the groups will be discretized into the specified bins by `pandas.cut`. - restore_coord_dims : bool, optional + restore_coord_dims : bool, default True If True, also restore the dimension order of multi-dimensional coordinates. cut_kwargs : dict, optional @@ -389,21 +389,6 @@ def __init__( "Failed to group data. Are you grouping by a variable that is all NaN?" ) - if ( - isinstance(obj, DataArray) - and restore_coord_dims is None - and any(obj[c].ndim > 1 for c in obj.coords) - ): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, the dimension order " - "of these coordinates will be restored as well " - "unless you specify restore_coord_dims=False.", - FutureWarning, - stacklevel=2, - ) - restore_coord_dims = False - # specification for the groupby operation self._obj = obj self._group = group diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c3e5aafabfe..6984d5361d2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2161,9 +2161,6 @@ def test_transpose(self): with pytest.raises(ValueError): da.transpose("x", "y") - with pytest.warns(FutureWarning): - da.transpose() - def test_squeeze(self): assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable) @@ -2753,9 +2750,6 @@ def test_groupby_restore_coord_dims(self): )["c"] assert result.dims == expected_dims - with pytest.warns(FutureWarning): - array.groupby("x").map(lambda x: x.squeeze()) - def test_groupby_first_and_last(self): array = DataArray([1, 2, 3, 4, 5], dims="x") by = DataArray(["a"] * 2 + ["b"] * 3, dims="x", name="ab") From 0b6e22f6af71cc19d2e0575ecdf5c66109c314dd Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 6 May 2020 16:41:53 +0000 Subject: [PATCH 016/342] Add template xarray object kwarg to map_blocks (#3816) * Allow providing template dataset to map_blocks. * Update dimension shape check. This accounts for dimension sizes being changed by the applied function. * Allow user function to add new unindexed dimension. * Add docstring for template. * renaming * Raise nice error if adding a new chunked dimension, * Raise nice error message when expected dimension is missing on returned object * Revert "Allow user function to add new unindexed dimension." This reverts commit 045ae2b1bf939515e0a38c960d0cdc7974bcfa37. * Add test + fix output_chunks for dataarray template * typing * fix test * Add nice error messages when result doesn't match template. * blacken * Add template kwarg to DataArray.map_blocks & Dataset.map_blocks * minor error message fixes. * docstring updates. * bugfix for expected shapes when template is not specified * Add map_blocks docs. * Update doc/dask.rst Co-Authored-By: Joe Hamman * refactor out slicer for chunks * Check expected index values. * Raise nice error when template object does not have required number of chunks * doc updates. * more review comments. * Mention that attrs are taken from template. * Add test and explicit point out that attrs is copied from template Co-authored-by: Joe Hamman --- doc/api.rst | 5 +- doc/dask.rst | 114 +++++++++++++++++++++++- doc/whats-new.rst | 5 ++ xarray/core/dataarray.py | 30 ++++--- xarray/core/dataset.py | 30 ++++--- xarray/core/parallel.py | 180 +++++++++++++++++++++++++++++--------- xarray/tests/test_dask.py | 69 ++++++++++++++- 7 files changed, 359 insertions(+), 74 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index b37c84e7a81..8ec6843d24a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -173,6 +173,7 @@ Computation Dataset.quantile Dataset.differentiate Dataset.integrate + Dataset.map_blocks Dataset.polyfit **Aggregation**: @@ -358,6 +359,8 @@ Computation DataArray.integrate DataArray.polyfit DataArray.str + DataArray.map_blocks + **Aggregation**: :py:attr:`~DataArray.all` @@ -518,7 +521,6 @@ Dataset methods Dataset.load Dataset.chunk Dataset.unify_chunks - Dataset.map_blocks Dataset.filter_by_attrs Dataset.info @@ -550,7 +552,6 @@ DataArray methods DataArray.load DataArray.chunk DataArray.unify_chunks - DataArray.map_blocks Coordinates objects =================== diff --git a/doc/dask.rst b/doc/dask.rst index 2248de9c0d8..df223982ba4 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -284,12 +284,21 @@ loaded into Dask or not: .. _dask.automatic-parallelization: -Automatic parallelization -------------------------- +Automatic parallelization with ``apply_ufunc`` and ``map_blocks`` +----------------------------------------------------------------- Almost all of xarray's built-in operations work on Dask arrays. If you want to -use a function that isn't wrapped by xarray, one option is to extract Dask -arrays from xarray objects (``.data``) and use Dask directly. +use a function that isn't wrapped by xarray, and have it applied in parallel on +each block of your xarray object, you have three options: + +1. Extract Dask arrays from xarray objects (``.data``) and use Dask directly. +2. Use :py:func:`~xarray.apply_ufunc` to apply functions that consume and return NumPy arrays. +3. Use :py:func:`~xarray.map_blocks`, :py:meth:`Dataset.map_blocks` or :py:meth:`DataArray.map_blocks` + to apply functions that consume and return xarray objects. + + +``apply_ufunc`` +~~~~~~~~~~~~~~~ Another option is to use xarray's :py:func:`~xarray.apply_ufunc`, which can automate `embarrassingly parallel @@ -400,6 +409,103 @@ application. structure of a problem, unlike the generic speedups offered by ``dask='parallelized'``. + +``map_blocks`` +~~~~~~~~~~~~~~ + +Functions that consume and return xarray objects can be easily applied in parallel using :py:func:`map_blocks`. +Your function will receive an xarray Dataset or DataArray subset to one chunk +along each chunked dimension. + +.. ipython:: python + + ds.temperature + +This DataArray has 3 chunks each with length 10 along the time dimension. +At compute time, a function applied with :py:func:`map_blocks` will receive a DataArray corresponding to a single block of shape 10x180x180 +(time x latitude x longitude) with values loaded. The following snippet illustrates how to check the shape of the object +received by the applied function. + +.. ipython:: python + + def func(da): + print(da.sizes) + return da.time + + mapped = xr.map_blocks(func, ds.temperature) + mapped + +Notice that the :py:meth:`map_blocks` call printed +``Frozen({'time': 0, 'latitude': 0, 'longitude': 0})`` to screen. +``func`` is received 0-sized blocks! :py:meth:`map_blocks` needs to know what the final result +looks like in terms of dimensions, shapes etc. It does so by running the provided function on 0-shaped +inputs (*automated inference*). This works in many cases, but not all. If automatic inference does not +work for your function, provide the ``template`` kwarg (see below). + +In this case, automatic inference has worked so let's check that the result is as expected. + +.. ipython:: python + + mapped.load(scheduler="single-threaded") + mapped.identical(ds.time) + +Note that we use ``.load(scheduler="single-threaded")`` to execute the computation. +This executes the Dask graph in `serial` using a for loop, but allows for printing to screen and other +debugging techniques. We can easily see that our function is receiving blocks of shape 10x180x180 and +the returned result is identical to ``ds.time`` as expected. + + +Here is a common example where automated inference will not work. + +.. ipython:: python + :okexcept: + + def func(da): + print(da.sizes) + return da.isel(time=[1]) + + mapped = xr.map_blocks(func, ds.temperature) + +``func`` cannot be run on 0-shaped inputs because it is not possible to extract element 1 along a +dimension of size 0. In this case we need to tell :py:func:`map_blocks` what the returned result looks +like using the ``template`` kwarg. ``template`` must be an xarray Dataset or DataArray (depending on +what the function returns) with dimensions, shapes, chunk sizes, attributes, coordinate variables *and* data +variables that look exactly like the expected result. The variables should be dask-backed and hence not +incur much memory cost. + +.. note:: + + Note that when ``template`` is provided, ``attrs`` from ``template`` are copied over to the result. Any + ``attrs`` set in ``func`` will be ignored. + + +.. ipython:: python + + template = ds.temperature.isel(time=[1, 11, 21]) + mapped = xr.map_blocks(func, ds.temperature, template=template) + + +Notice that the 0-shaped sizes were not printed to screen. Since ``template`` has been provided +:py:func:`map_blocks` does not need to infer it by running ``func`` on 0-shaped inputs. + +.. ipython:: python + + mapped.identical(template) + + +:py:func:`map_blocks` also allows passing ``args`` and ``kwargs`` down to the user function ``func``. +``func`` will be executed as ``func(block_xarray, *args, **kwargs)`` so ``args`` must be a list and ``kwargs`` must be a dictionary. + +.. ipython:: python + + def func(obj, a, b=0): + return obj + a + b + + mapped = ds.map_blocks(func, args=[10], kwargs={"b": 10}) + expected = ds + 10 + 10 + mapped.identical(expected) + + Chunking and performance ------------------------ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1204155f062..b22a7217568 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -67,6 +67,9 @@ New Features the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This feature requires cftime version 1.1.0 or greater. By `Spencer Clark `_. +- :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases + where the result of a computation could not be inferred automatically. + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ @@ -123,6 +126,8 @@ Documentation By `Matthias Riße `_. - Apply ``black`` to all the code in the documentation (:pull:`4012`) By `Justus Magin `_. +- Narrative documentation now describes :py:meth:`map_blocks`. :ref:`dask.automatic-parallelization`. + By `Deepak Cherian `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fc9e3410247..236938bac74 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3250,27 +3250,25 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this DataArray. This method is experimental - and its signature may change. + Apply a function to each block of this DataArray. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a DataArray as its first parameter. The - function will receive a subset of this DataArray, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this array - but has sizes 0, to determine properties of the returned object such as - dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a DataArray as its first + parameter. The function will receive a subset, i.e. one block, of this DataArray + (see below), corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(block_subset, *args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. args: Sequence Passed verbatim to func after unpacking, after the sliced DataArray. xarray objects, if any, will not be split by chunks. Passing dask collections is @@ -3278,6 +3276,12 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. Returns ------- @@ -3300,7 +3304,7 @@ def map_blocks( """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2a8b7bdbb9a..3a55f3eca27 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5709,27 +5709,25 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this Dataset. This method is experimental and - its signature may change. + Apply a function to each block of this Dataset. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a Dataset as its first parameter. The - function will receive a subset of this Dataset, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this - Dataset but has sizes 0, to determine properties of the returned object such - as dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a Dataset as its first + parameter. The function will receive a subset, i.e. one block, of this Dataset + (see below), corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(block_subset, *args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. args: Sequence Passed verbatim to func after unpacking, after the sliced DataArray. xarray objects, if any, will not be split by chunks. Passing dask collections is @@ -5737,6 +5735,12 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. Returns ------- @@ -5759,7 +5763,7 @@ def map_blocks( """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 6f1668f698f..d91dfb4a275 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -31,6 +31,30 @@ T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset) +def check_result_variables( + result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str +): + + if kind == "coords": + nice_str = "coordinate" + elif kind == "data_vars": + nice_str = "data" + + # check that coords and data variables are as expected + missing = expected[kind] - set(getattr(result, kind)) + if missing: + raise ValueError( + "Result from applying user function does not contain " + f"{nice_str} variables {missing}." + ) + extra = set(getattr(result, kind)) - expected[kind] + if extra: + raise ValueError( + "Result from applying user function has unexpected " + f"{nice_str} variables {extra}." + ) + + def dataset_to_dataarray(obj: Dataset) -> DataArray: if not isinstance(obj, Dataset): raise TypeError("Expected Dataset, got %s" % type(obj)) @@ -80,7 +104,8 @@ def infer_template( template = func(*meta_args, **kwargs) except Exception as e: raise Exception( - "Cannot infer object returned from running user provided function." + "Cannot infer object returned from running user provided function. " + "Please supply the 'template' kwarg to map_blocks." ) from e if not isinstance(template, (Dataset, DataArray)): @@ -102,14 +127,24 @@ def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]: return {k: v.data for k, v in x.variables.items()} +def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping): + if dim in chunk_index: + which_chunk = chunk_index[dim] + return slice(chunk_bounds[dim][which_chunk], chunk_bounds[dim][which_chunk + 1]) + return slice(None) + + def map_blocks( func: Callable[..., T_DSorDA], obj: Union[DataArray, Dataset], args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union[DataArray, Dataset] = None, ) -> T_DSorDA: - """Apply a function to each chunk of a DataArray or Dataset. This function is - experimental and its signature may change. + """Apply a function to each block of a DataArray or Dataset. + + .. warning:: + This function is experimental and its signature may change. Parameters ---------- @@ -119,14 +154,10 @@ def map_blocks( corresponding to one chunk along each chunked dimension. ``func`` will be executed as ``func(obj_subset, *args, **kwargs)``. - The function will be first run on mocked-up data, that looks like 'obj' but - has sizes 0, to determine properties of the returned object such as dtype, - variable names, new dimensions and new indexes (if any). - This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. + obj: DataArray, Dataset Passed to the function as its first argument, one dask chunk at a time. args: Sequence @@ -135,6 +166,15 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, attributes, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. + When provided, `attrs` on variables in `template` are copied over to the result. Any + `attrs` set by `func` will be ignored. + Returns ------- @@ -201,22 +241,47 @@ def map_blocks( * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 """ - def _wrapper(func, obj, to_array, args, kwargs): + def _wrapper(func, obj, to_array, args, kwargs, expected): + check_shapes = dict(obj.dims) + check_shapes.update(expected["shapes"]) + if to_array: obj = dataset_to_dataarray(obj) result = func(obj, *args, **kwargs) + # check all dims are present + missing_dimensions = set(expected["shapes"]) - set(result.sizes) + if missing_dimensions: + raise ValueError( + f"Dimensions {missing_dimensions} missing on returned object." + ) + + # check that index lengths and values are as expected for name, index in result.indexes.items(): - if name in obj.indexes: - if len(index) != len(obj.indexes[name]): + if name in check_shapes: + if len(index) != check_shapes[name]: raise ValueError( - "Length of the %r dimension has changed. This is not allowed." - % name + f"Received dimension {name!r} of length {len(index)}. Expected length {check_shapes[name]}." ) + if name in expected["indexes"]: + expected_index = expected["indexes"][name] + if not index.equals(expected_index): + raise ValueError( + f"Expected index {name!r} to be {expected_index!r}. Received {index!r} instead." + ) + + # check that all expected variables were returned + check_result_variables(result, expected, "coords") + if isinstance(result, Dataset): + check_result_variables(result, expected, "data_vars") return make_dict(result) + if template is not None and not isinstance(template, (DataArray, Dataset)): + raise TypeError( + f"template must be a DataArray or Dataset. Received {type(template).__name__} instead." + ) if not isinstance(args, Sequence): raise TypeError("args must be a sequence (for example, a list or tuple).") if kwargs is None: @@ -248,8 +313,38 @@ def _wrapper(func, obj, to_array, args, kwargs): input_is_array = False input_chunks = dataset.chunks + dataset_indexes = set(dataset.indexes) + if template is None: + # infer template by providing zero-shaped arrays + template = infer_template(func, obj, *args, **kwargs) + template_indexes = set(template.indexes) + preserved_indexes = template_indexes & dataset_indexes + new_indexes = template_indexes - dataset_indexes + indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} + indexes.update({k: template.indexes[k] for k in new_indexes}) + output_chunks = { + dim: input_chunks[dim] for dim in template.dims if dim in input_chunks + } + + else: + # template xarray object has been provided with proper sizes and chunk shapes + template_indexes = set(template.indexes) + indexes = {dim: dataset.indexes[dim] for dim in dataset_indexes} + indexes.update({k: template.indexes[k] for k in template_indexes}) + if isinstance(template, DataArray): + output_chunks = dict(zip(template.dims, template.chunks)) # type: ignore + else: + output_chunks = template.chunks # type: ignore + + for dim in output_chunks: + if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]): + raise ValueError( + "map_blocks requires that one block of the input maps to one block of output. " + f"Expected number of output chunks along dimension {dim!r} to be {len(input_chunks[dim])}. " + f"Received {len(output_chunks[dim])} instead. Please provide template if not provided, or " + "fix the provided template." + ) - template: Union[DataArray, Dataset] = infer_template(func, obj, *args, **kwargs) if isinstance(template, DataArray): result_is_array = True template_name = template.name @@ -261,13 +356,6 @@ def _wrapper(func, obj, to_array, args, kwargs): f"func output must be DataArray or Dataset; got {type(template)}" ) - template_indexes = set(template.indexes) - dataset_indexes = set(dataset.indexes) - preserved_indexes = template_indexes & dataset_indexes - new_indexes = template_indexes - dataset_indexes - indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} - indexes.update({k: template.indexes[k] for k in new_indexes}) - # We're building a new HighLevelGraph hlg. We'll have one new layer # for each variable in the dataset, which is the result of the # func applied to the values. @@ -281,13 +369,16 @@ def _wrapper(func, obj, to_array, args, kwargs): # map dims to list of chunk indexes ichunk = {dim: range(len(chunks_v)) for dim, chunks_v in input_chunks.items()} # mapping from chunk index to slice bounds - chunk_index_bounds = { + input_chunk_bounds = { dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in input_chunks.items() } + output_chunk_bounds = { + dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items() + } # iterate over all possible chunk combinations for v in itertools.product(*ichunk.values()): - chunk_index_dict = dict(zip(dataset.dims, v)) + chunk_index = dict(zip(dataset.dims, v)) # this will become [[name1, variable1], # [name2, variable2], @@ -302,9 +393,9 @@ def _wrapper(func, obj, to_array, args, kwargs): # recursively index into dask_keys nested list to get chunk chunk = variable.__dask_keys__() for dim in variable.dims: - chunk = chunk[chunk_index_dict[dim]] + chunk = chunk[chunk_index[dim]] - chunk_variable_task = (f"{gname}-{chunk[0]}",) + v + chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + v graph[chunk_variable_task] = ( tuple, [variable.dims, chunk, variable.attrs], @@ -312,15 +403,10 @@ def _wrapper(func, obj, to_array, args, kwargs): else: # non-dask array with possibly chunked dimensions # index into variable appropriately - subsetter = {} - for dim in variable.dims: - if dim in chunk_index_dict: - which_chunk = chunk_index_dict[dim] - subsetter[dim] = slice( - chunk_index_bounds[dim][which_chunk], - chunk_index_bounds[dim][which_chunk + 1], - ) - + subsetter = { + dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds) + for dim in variable.dims + } subset = variable.isel(subsetter) chunk_variable_task = ( "{}-{}".format(gname, dask.base.tokenize(subset)), @@ -336,6 +422,20 @@ def _wrapper(func, obj, to_array, args, kwargs): else: data_vars.append([name, chunk_variable_task]) + # expected["shapes", "coords", "data_vars", "indexes"] are used to raise nice error messages in _wrapper + expected = {} + # input chunk 0 along a dimension maps to output chunk 0 along the same dimension + # even if length of dimension is changed by the applied function + expected["shapes"] = { + k: output_chunks[k][v] for k, v in chunk_index.items() if k in output_chunks + } + expected["data_vars"] = set(template.data_vars.keys()) # type: ignore + expected["coords"] = set(template.coords.keys()) # type: ignore + expected["indexes"] = { + dim: indexes[dim][_get_chunk_slicer(dim, chunk_index, output_chunk_bounds)] + for dim in indexes + } + from_wrapper = (gname,) + v graph[from_wrapper] = ( _wrapper, @@ -344,6 +444,7 @@ def _wrapper(func, obj, to_array, args, kwargs): input_is_array, args, kwargs, + expected, ) # mapping from variable name to dask graph key @@ -356,10 +457,11 @@ def _wrapper(func, obj, to_array, args, kwargs): key: Tuple[Any, ...] = (gname_l,) for dim in variable.dims: - if dim in chunk_index_dict: - key += (chunk_index_dict[dim],) + if dim in chunk_index: + key += (chunk_index[dim],) else: # unchunked dimensions in the input have one chunk in the result + # output can have new dimensions with exactly one chunk key += (0,) # We're adding multiple new layers to the graph: @@ -382,8 +484,8 @@ def _wrapper(func, obj, to_array, args, kwargs): dims = template[name].dims var_chunks = [] for dim in dims: - if dim in input_chunks: - var_chunks.append(input_chunks[dim]) + if dim in output_chunks: + var_chunks.append(output_chunks[dim]) elif dim in indexes: var_chunks.append((len(indexes[dim]),)) elif dim in template.dims: diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 538dbbfb58b..75beb3757ca 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1039,7 +1039,7 @@ def test_map_blocks_error(map_da, map_ds): def bad_func(darray): return (darray * darray.x + 5 * darray.y)[:1, :1] - with raises_regex(ValueError, "Length of the.* has changed."): + with raises_regex(ValueError, "Received dimension 'x' of length 1"): xr.map_blocks(bad_func, map_da).compute() def returns_numpy(darray): @@ -1109,6 +1109,11 @@ def add_attrs(obj): assert_identical(actual, expected) + # when template is specified, attrs are copied from template, not set by function + with raise_if_dask_computes(): + actual = xr.map_blocks(add_attrs, obj, template=obj) + assert_identical(actual, obj) + def test_map_blocks_change_name(map_da): def change_name(obj): @@ -1150,7 +1155,7 @@ def test_map_blocks_to_array(map_ds): lambda x: x.expand_dims(k=3), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), - # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], + lambda x: x.x, ], ) def test_map_blocks_da_transformations(func, map_da): @@ -1170,7 +1175,7 @@ def test_map_blocks_da_transformations(func, map_da): lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.expand_dims(k=3), lambda x: x.rename({"a": "new1", "b": "new2"}), - # TODO: [lambda x: x.isel(x=1)], + lambda x: x.x, ], ) def test_map_blocks_ds_transformations(func, map_ds): @@ -1180,6 +1185,64 @@ def test_map_blocks_ds_transformations(func, map_ds): assert_identical(actual, func(map_ds)) +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) +def test_map_blocks_da_ds_with_template(obj): + func = lambda x: x.isel(x=[1]) + template = obj.isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, obj, template=template) + assert_identical(actual, template) + + with raise_if_dask_computes(): + actual = obj.map_blocks(func, template=template) + assert_identical(actual, template) + + +def test_map_blocks_template_convert_object(): + da = make_da() + func = lambda x: x.to_dataset().isel(x=[1]) + template = da.to_dataset().isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, da, template=template) + assert_identical(actual, template) + + ds = da.to_dataset() + func = lambda x: x.to_array().isel(x=[1]) + template = ds.to_array().isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, ds, template=template) + assert_identical(actual, template) + + +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) +def test_map_blocks_errors_bad_template(obj): + with raises_regex(ValueError, "unexpected coordinate variables"): + xr.map_blocks(lambda x: x.assign_coords(a=10), obj, template=obj).compute() + with raises_regex(ValueError, "does not contain coordinate variables"): + xr.map_blocks(lambda x: x.drop_vars("cxy"), obj, template=obj).compute() + with raises_regex(ValueError, "Dimensions {'x'} missing"): + xr.map_blocks(lambda x: x.isel(x=1), obj, template=obj).compute() + with raises_regex(ValueError, "Received dimension 'x' of length 1"): + xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=obj).compute() + with raises_regex(TypeError, "must be a DataArray"): + xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=(obj,)).compute() + with raises_regex(ValueError, "map_blocks requires that one block"): + xr.map_blocks( + lambda x: x.isel(x=[1]).assign_coords(x=10), obj, template=obj.isel(x=[1]) + ).compute() + with raises_regex(ValueError, "Expected index 'x' to be"): + xr.map_blocks( + lambda a: a.isel(x=[1]).assign_coords(x=[120]), # assign bad index values + obj, + template=obj.isel(x=[1, 5, 9]), + ).compute() + + +def test_map_blocks_errors_bad_template_2(map_ds): + with raises_regex(ValueError, "unexpected data variables {'xyz'}"): + xr.map_blocks(lambda x: x.assign(xyz=1), map_ds, template=map_ds).compute() + + @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_object_method(obj): def func(obj): From 0e43ba9c7080fc5f1076b0bccec09aeac1a3499b Mon Sep 17 00:00:00 2001 From: Prajjwal Nijhara Date: Thu, 7 May 2020 04:25:39 +0530 Subject: [PATCH 017/342] Use literal syntax instead of function calls to create the data structure (#4038) * Use literal syntax instead of function calls to create the data structure * Update whats-new.rst * Update whats-new.rst --- doc/whats-new.rst | 4 +--- xarray/core/merge.py | 2 +- xarray/util/print_versions.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b22a7217568..3be7c2f45e6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -142,9 +142,7 @@ Internal Changes - Use ``async`` / ``await`` for the asynchronous distributed tests. (:issue:`3987`, :pull:`3989`) By `Justus Magin `_. -- Remove unnecessary comprehensions becuase the built-in functions like - ``all``, ``any``, ``enumerate``, ``sum``, ``tuple`` etc. can work directly with a - generator expression. (:pull:`4026`) +- Various internal code clean-ups (:pull:`4026`, :pull:`4038`). By `Prajjwal Nijhara `_. .. _whats-new.0.15.1: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index fea94246471..35b77d700a0 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -841,7 +841,7 @@ def merge( from .dataarray import DataArray from .dataset import Dataset - dict_like_objects = list() + dict_like_objects = [] for obj in objects: if not isinstance(obj, (DataArray, Dataset, dict)): raise TypeError( diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 32051bb6843..96983c83aab 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -129,7 +129,7 @@ def show_versions(file=sys.stdout): ("sphinx", lambda mod: mod.__version__), ] - deps_blob = list() + deps_blob = [] for (modname, ver_f) in deps: try: if modname in sys.modules: From 69548df9826cde9df6cbdae9c033c9fb1e62d493 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Fri, 8 May 2020 05:36:31 +0900 Subject: [PATCH 018/342] support darkmode (#4036) * support darkmode but in vscode only * remove unused space * support colab (maybe) and whatsnew --- doc/whats-new.rst | 4 +++- xarray/static/css/style.css | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3be7c2f45e6..a1d52b28ed5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -73,7 +73,9 @@ New Features Bug fixes ~~~~~~~~~ -- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`) +- Support dark mode in VS code (:issue:`4024`) + By `Keisuke Fujii `_. +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`) By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) By `Keisuke Fujii `_. diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index 7e382de3b5b..acfe85d5ac7 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -13,6 +13,18 @@ --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); } +html[theme=dark], +body.vscode-dark { + --xr-font-color0: rgba(255, 255, 255, 1); + --xr-font-color2: rgba(255, 255, 255, 0.54); + --xr-font-color3: rgba(255, 255, 255, 0.38); + --xr-border-color: #1F1F1F; + --xr-disabled-color: #515151; + --xr-background-color: #111111; + --xr-background-color-row-even: #111111; + --xr-background-color-row-odd: #313131; +} + .xr-wrap { min-width: 300px; max-width: 700px; From 3e5dd6ef32b9c69806af69a3a5168edcf3b2e21f Mon Sep 17 00:00:00 2001 From: David Brochart Date: Mon, 11 May 2020 16:54:27 +0200 Subject: [PATCH 019/342] Add xarray-leaflet to the visualization projects (#4051) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 57b8da0c447..b02c4be7338 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -75,6 +75,7 @@ Visualization - `Datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data. - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews. - `psyplot `_: Interactive data visualization with python. +- `xarray-leaflet `_: An xarray extension for tiles map plotting based on ipyleaflet. Non-Python projects ~~~~~~~~~~~~~~~~~~~ From bd84186acbd84bd386134a5b60111596cee2d8ec Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 12 May 2020 22:47:50 +0000 Subject: [PATCH 020/342] Fix contour when levels is scalar and norm is provided. (#3914) Fixes #3735 --- doc/whats-new.rst | 2 ++ xarray/plot/utils.py | 2 +- xarray/tests/test_plot.py | 9 +++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a1d52b28ed5..0724460b1e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -91,6 +91,8 @@ Bug fixes By `Deepak Cherian `_ - Fix :py:class:`~xarray.plot.FacetGrid` when ``vmin == vmax``. (:issue:`3734`) By `Deepak Cherian `_ +- Fix plotting when ``levels`` is a scalar and ``norm`` is provided. (:issue:`3735`) + By `Deepak Cherian `_ - Fix bug where plotting line plots with 2D coordinates depended on dimension order. (:issue:`3933`) By `Tom Nicholas `_. diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index c3512828888..cb993c192d9 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -268,7 +268,7 @@ def _determine_cmap_params( cmap = OPTIONS["cmap_sequential"] # Handle discrete levels - if levels is not None and norm is None: + if levels is not None: if is_scalar(levels): if user_minmax: levels = np.linspace(vmin, vmax, levels) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index bf1f9ed60bb..af7c686bf60 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -854,21 +854,22 @@ def test_norm_sets_vmin_vmax(self): vmin = self.data.min() vmax = self.data.max() - for norm, extend in zip( + for norm, extend, levels in zip( [ + mpl.colors.Normalize(), mpl.colors.Normalize(), mpl.colors.Normalize(vmin + 0.1, vmax - 0.1), mpl.colors.Normalize(None, vmax - 0.1), mpl.colors.Normalize(vmin + 0.1, None), ], - ["neither", "both", "max", "min"], + ["neither", "neither", "both", "max", "min"], + [7, None, None, None, None], ): test_min = vmin if norm.vmin is None else norm.vmin test_max = vmax if norm.vmax is None else norm.vmax - cmap_params = _determine_cmap_params(self.data, norm=norm) - + cmap_params = _determine_cmap_params(self.data, norm=norm, levels=levels) assert cmap_params["vmin"] == test_min assert cmap_params["vmax"] == test_max assert cmap_params["extend"] == extend From c73e9589da06730848a876b1c277bf1ad389372f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 13 May 2020 17:51:43 +0200 Subject: [PATCH 021/342] FIX: correct dask array handling in _calc_idxminmax (#3922) * FIX: correct dask array handling in _calc_idxminmax * FIX: remove unneeded import, reformat via black * fix idxmax, idxmin with dask arrays * FIX: use array[dim].data in `_calc_idxminmax` as per @keewis suggestion, attach dim name to result * ADD: add dask tests to `idxmin`/`idxmax` dataarray tests * FIX: add back fixture line removed by accident * ADD: complete dask handling in `idxmin`/`idxmax` tests in test_dataarray, xfail dask tests for dtype dateime64 (M) * ADD: add "support dask handling for idxmin/idxmax" in whats-new.rst * MIN: reintroduce changes added by #3953 * MIN: change if-clause to use `and` instead of `&` as per review-comment * MIN: change if-clause to use `and` instead of `&` as per review-comment * WIP: remove dask handling entirely for debugging purposes * Test for dask computes * WIP: re-add dask handling (map_blocks-approach), add `with raise_if_dask_computes()` context to idxmin-tests * Use dask indexing instead of map_blocks. * Better chunk choice. * Return -1 for _nan_argminmax_object if all NaNs along dim * Revert "Return -1 for _nan_argminmax_object if all NaNs along dim" This reverts commit 58901b9da821a04f2ec085577cb916c4d67f6f50. * Raise error for object arrays * No error for object arrays. Instead expect 1 compute in tests. Co-authored-by: dcherian --- doc/whats-new.rst | 3 + xarray/core/computation.py | 23 +++---- xarray/tests/test_dataarray.py | 120 ++++++++++++++++++++++++++------- 3 files changed, 110 insertions(+), 36 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0724460b1e5..cd30fab0160 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -53,6 +53,9 @@ New Features - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ +- Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, + :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`) + By `Kai Mühlbauer `_. - More support for unit aware arrays with pint (:pull:`3643`) By `Justus Magin `_. - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even diff --git a/xarray/core/computation.py b/xarray/core/computation.py index a3723ea9db9..28bf818e4a3 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -26,7 +26,6 @@ from . import dtypes, duck_array_ops, utils from .alignment import deep_align from .merge import merge_coordinates_without_align -from .nanops import dask_array from .options import OPTIONS from .pycompat import dask_array_type from .utils import is_dict_like @@ -1380,24 +1379,24 @@ def _calc_idxminmax( # This will run argmin or argmax. indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna) - # Get the coordinate we want. - coordarray = array[dim] - # Handle dask arrays. - if isinstance(array, dask_array_type): - res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype) + if isinstance(array.data, dask_array_type): + import dask.array + + chunks = dict(zip(array.dims, array.chunks)) + dask_coord = dask.array.from_array(array[dim].data, chunks=chunks[dim]) + res = indx.copy(data=dask_coord[(indx.data,)]) + # we need to attach back the dim name + res.name = dim else: - res = coordarray[ - indx, - ] + res = array[dim][(indx,)] + # The dim is gone but we need to remove the corresponding coordinate. + del res.coords[dim] if skipna or (skipna is None and array.dtype.kind in na_dtypes): # Put the NaN values back in after removing them res = res.where(~allna, fill_value) - # The dim is gone but we need to remove the corresponding coordinate. - del res.coords[dim] - # Copy attributes from argmin/argmax, if any res.attrs = indx.attrs diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6984d5361d2..a01234616a4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -34,6 +34,8 @@ source_ndarray, ) +from .test_dask import raise_if_dask_computes + class TestDataArray: @pytest.fixture(autouse=True) @@ -4524,11 +4526,21 @@ def test_argmax(self, x, minindex, maxindex, nanindex): assert_identical(result2, expected2) - def test_idxmin(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmin' breaks when dtype is datetime64 (M)") + ar0_raw = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + # dim doesn't exist with pytest.raises(KeyError): ar0.idxmin(dim="spam") @@ -4620,11 +4632,21 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): result7 = ar0.idxmin(fill_value=-1j) assert_identical(result7, expected7) - def test_idxmax(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)") + ar0_raw = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + # dim doesn't exist with pytest.raises(KeyError): ar0.idxmax(dim="spam") @@ -4944,14 +4966,31 @@ def test_argmax(self, x, minindex, maxindex, nanindex): assert_identical(result3, expected2) - def test_idxmin(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmin' breaks when dtype is datetime64 (M)") + + if x.dtype.kind == "O": + # TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices. + max_computes = 1 + else: + max_computes = 0 + + ar0_raw = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + assert_identical(ar0, ar0) # No dimension specified @@ -4982,15 +5021,18 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected0.name = "x" # Default fill value (NaN) - result0 = ar0.idxmin(dim="x") + with raise_if_dask_computes(max_computes=max_computes): + result0 = ar0.idxmin(dim="x") assert_identical(result0, expected0) # Manually specify NaN fill_value - result1 = ar0.idxmin(dim="x", fill_value=np.NaN) + with raise_if_dask_computes(max_computes=max_computes): + result1 = ar0.idxmin(dim="x", fill_value=np.NaN) assert_identical(result1, expected0) # keep_attrs - result2 = ar0.idxmin(dim="x", keep_attrs=True) + with raise_if_dask_computes(max_computes=max_computes): + result2 = ar0.idxmin(dim="x", keep_attrs=True) expected2 = expected0.copy() expected2.attrs = self.attrs assert_identical(result2, expected2) @@ -5008,11 +5050,13 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected3.name = "x" expected3.attrs = {} - result3 = ar0.idxmin(dim="x", skipna=False) + with raise_if_dask_computes(max_computes=max_computes): + result3 = ar0.idxmin(dim="x", skipna=False) assert_identical(result3, expected3) # fill_value should be ignored with skipna=False - result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j) + with raise_if_dask_computes(max_computes=max_computes): + result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j) assert_identical(result4, expected3) # Float fill_value @@ -5024,7 +5068,8 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected5 = xr.concat(expected5, dim="y") expected5.name = "x" - result5 = ar0.idxmin(dim="x", fill_value=-1.1) + with raise_if_dask_computes(max_computes=max_computes): + result5 = ar0.idxmin(dim="x", fill_value=-1.1) assert_identical(result5, expected5) # Integer fill_value @@ -5036,7 +5081,8 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected6 = xr.concat(expected6, dim="y") expected6.name = "x" - result6 = ar0.idxmin(dim="x", fill_value=-1) + with raise_if_dask_computes(max_computes=max_computes): + result6 = ar0.idxmin(dim="x", fill_value=-1) assert_identical(result6, expected6) # Complex fill_value @@ -5048,17 +5094,35 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected7 = xr.concat(expected7, dim="y") expected7.name = "x" - result7 = ar0.idxmin(dim="x", fill_value=-5j) + with raise_if_dask_computes(max_computes=max_computes): + result7 = ar0.idxmin(dim="x", fill_value=-5j) assert_identical(result7, expected7) - def test_idxmax(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)") + + if x.dtype.kind == "O": + # TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices. + max_computes = 1 + else: + max_computes = 0 + + ar0_raw = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + # No dimension specified with pytest.raises(ValueError): ar0.idxmax() @@ -5090,15 +5154,18 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected0.name = "x" # Default fill value (NaN) - result0 = ar0.idxmax(dim="x") + with raise_if_dask_computes(max_computes=max_computes): + result0 = ar0.idxmax(dim="x") assert_identical(result0, expected0) # Manually specify NaN fill_value - result1 = ar0.idxmax(dim="x", fill_value=np.NaN) + with raise_if_dask_computes(max_computes=max_computes): + result1 = ar0.idxmax(dim="x", fill_value=np.NaN) assert_identical(result1, expected0) # keep_attrs - result2 = ar0.idxmax(dim="x", keep_attrs=True) + with raise_if_dask_computes(max_computes=max_computes): + result2 = ar0.idxmax(dim="x", keep_attrs=True) expected2 = expected0.copy() expected2.attrs = self.attrs assert_identical(result2, expected2) @@ -5116,11 +5183,13 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected3.name = "x" expected3.attrs = {} - result3 = ar0.idxmax(dim="x", skipna=False) + with raise_if_dask_computes(max_computes=max_computes): + result3 = ar0.idxmax(dim="x", skipna=False) assert_identical(result3, expected3) # fill_value should be ignored with skipna=False - result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j) + with raise_if_dask_computes(max_computes=max_computes): + result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j) assert_identical(result4, expected3) # Float fill_value @@ -5132,7 +5201,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected5 = xr.concat(expected5, dim="y") expected5.name = "x" - result5 = ar0.idxmax(dim="x", fill_value=-1.1) + with raise_if_dask_computes(max_computes=max_computes): + result5 = ar0.idxmax(dim="x", fill_value=-1.1) assert_identical(result5, expected5) # Integer fill_value @@ -5144,7 +5214,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected6 = xr.concat(expected6, dim="y") expected6.name = "x" - result6 = ar0.idxmax(dim="x", fill_value=-1) + with raise_if_dask_computes(max_computes=max_computes): + result6 = ar0.idxmax(dim="x", fill_value=-1) assert_identical(result6, expected6) # Complex fill_value @@ -5156,7 +5227,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected7 = xr.concat(expected7, dim="y") expected7.name = "x" - result7 = ar0.idxmax(dim="x", fill_value=-5j) + with raise_if_dask_computes(max_computes=max_computes): + result7 = ar0.idxmax(dim="x", fill_value=-5j) assert_identical(result7, expected7) From 8051c47686ae23062f9008785563d62327735002 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 13 May 2020 19:35:45 +0200 Subject: [PATCH 022/342] fix the failing flake8 CI (#4057) * rename d and l to dim and length --- xarray/backends/common.py | 2 +- xarray/backends/memory.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index fa3ee19f542..63c4c956f86 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -241,7 +241,7 @@ def encode_attribute(self, a): """encode one attribute""" return a - def set_dimension(self, d, l): # pragma: no cover + def set_dimension(self, dim, length): # pragma: no cover raise NotImplementedError() def set_attribute(self, k, v): # pragma: no cover diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py index bee6521bce2..17095d09651 100644 --- a/xarray/backends/memory.py +++ b/xarray/backends/memory.py @@ -40,6 +40,6 @@ def set_attribute(self, k, v): # copy to imitate writing to disk. self._attributes[k] = copy.deepcopy(v) - def set_dimension(self, d, l, unlimited_dims=None): + def set_dimension(self, dim, length, unlimited_dims=None): # in this model, dimensions are accounted for in the variables pass From 2542a63f6ebed1a464af7fc74b9f3bf302925803 Mon Sep 17 00:00:00 2001 From: clausmichele <31700619+clausmichele@users.noreply.github.com> Date: Thu, 14 May 2020 14:28:54 +0000 Subject: [PATCH 023/342] Fixed typo in rasterio docs (#4063) --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 738d8d2b7ab..27e182906a4 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -804,7 +804,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [4]: rds.rio.crs Out[4]: CRS.from_epsg(32618) - In [5]: rds4326 = rio.rio.reproject("epsg:4326") + In [5]: rds4326 = rds.rio.reproject("epsg:4326") In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) From 742d00076c8e79cb753b4b4856dbbef5f52878c6 Mon Sep 17 00:00:00 2001 From: aurghs <35919497+aurghs@users.noreply.github.com> Date: Tue, 19 May 2020 17:43:53 +0200 Subject: [PATCH 024/342] #1621 optional decode timedelta (#4071) * add decode_timedelta kwarg in decode_cf and open_* functions and test. * Fix style issue * Add chang author reference * removed check decode_timedelta in open_dataset * fix docstring indentation * fix: force dtype in test decode_timedelta --- doc/whats-new.rst | 6 ++++++ xarray/backends/api.py | 16 +++++++++++++++ xarray/backends/zarr.py | 8 ++++++++ xarray/conventions.py | 21 ++++++++++++++----- xarray/tests/test_conventions.py | 35 ++++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cd30fab0160..f5b6f2f3e86 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -74,6 +74,12 @@ New Features where the result of a computation could not be inferred automatically. By `Deepak Cherian `_ +- Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`, + (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`, + :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas + independently of time decoding (:issue:`1621`) + `Aureliana Barghini ` + Bug fixes ~~~~~~~~~ - Support dark mode in VS code (:issue:`4024`) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 184aad579a2..0919d2a582b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -303,6 +303,7 @@ def open_dataset( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open and decode a dataset from a file or file-like object. @@ -383,6 +384,11 @@ def open_dataset( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -435,6 +441,7 @@ def open_dataset( decode_times = False concat_characters = False decode_coords = False + decode_timedelta = False if cache is None: cache = chunks is None @@ -451,6 +458,7 @@ def maybe_decode_store(store, lock=False): decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) _protect_dataset_variables_inplace(ds, cache) @@ -477,6 +485,7 @@ def maybe_decode_store(store, lock=False): chunks, drop_variables, use_cftime, + decode_timedelta, ) name_prefix = "open_dataset-%s" % token ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token) @@ -561,6 +570,7 @@ def open_dataarray( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open an DataArray from a file or file-like object containing a single data variable. @@ -640,6 +650,11 @@ def open_dataarray( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Notes ----- @@ -671,6 +686,7 @@ def open_dataarray( drop_variables=drop_variables, backend_kwargs=backend_kwargs, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if len(dataset.data_vars) != 1: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index de6b627447e..540759a1c4c 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -503,6 +503,7 @@ def open_zarr( drop_variables=None, consolidated=False, overwrite_encoded_chunks=False, + decode_timedelta=None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -562,6 +563,11 @@ def open_zarr( consolidated : bool, optional Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -612,6 +618,7 @@ def open_zarr( decode_times = False concat_characters = False decode_coords = False + decode_timedelta = False def maybe_decode_store(store, lock=False): ds = conventions.decode_cf( @@ -621,6 +628,7 @@ def maybe_decode_store(store, lock=False): concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, + decode_timedelta=decode_timedelta, ) # TODO: this is where we would apply caching diff --git a/xarray/conventions.py b/xarray/conventions.py index df24d0d3d8d..588fcea71a3 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -266,6 +266,7 @@ def decode_cf_variable( decode_endianness=True, stack_char_dim=True, use_cftime=None, + decode_timedelta=None, ): """ Decodes a variable which may hold CF encoded information. @@ -315,6 +316,9 @@ def decode_cf_variable( var = as_variable(var) original_dtype = var.dtype + if decode_timedelta is None: + decode_timedelta = decode_times + if concat_characters: if stack_char_dim: var = strings.CharacterArrayCoder().decode(var, name=name) @@ -328,12 +332,10 @@ def decode_cf_variable( ]: var = coder.decode(var, name=name) + if decode_timedelta: + var = times.CFTimedeltaCoder().decode(var, name=name) if decode_times: - for coder in [ - times.CFTimedeltaCoder(), - times.CFDatetimeCoder(use_cftime=use_cftime), - ]: - var = coder.decode(var, name=name) + var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) # TODO(shoyer): convert everything below to use coders @@ -442,6 +444,7 @@ def decode_cf_variables( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """ Decode several CF encoded variables. @@ -492,6 +495,7 @@ def stackable(dim): decode_times=decode_times, stack_char_dim=stack_char_dim, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if decode_coords: var_attrs = new_vars[k].attrs @@ -518,6 +522,7 @@ def decode_cf( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """Decode the given Dataset or Datastore according to CF conventions into a new Dataset. @@ -552,6 +557,11 @@ def decode_cf( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -583,6 +593,7 @@ def decode_cf( decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars)) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index acb2400ea04..dfd20a303ff 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -311,6 +311,41 @@ def test_decode_dask_times(self): conventions.decode_cf(original).chunk(), ) + def test_decode_cf_time_kwargs(self): + ds = Dataset.from_dict( + { + "coords": { + "timedelta": { + "data": np.array([1, 2, 3], dtype="int64"), + "dims": "timedelta", + "attrs": {"units": "days"}, + }, + "time": { + "data": np.array([1, 2, 3], dtype="int64"), + "dims": "time", + "attrs": {"units": "days since 2000-01-01"}, + }, + }, + "dims": {"time": 3, "timedelta": 3}, + "data_vars": { + "a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))}, + }, + } + ) + + dsc = conventions.decode_cf(ds) + assert dsc.timedelta.dtype == np.dtype("m8[ns]") + assert dsc.time.dtype == np.dtype("M8[ns]") + dsc = conventions.decode_cf(ds, decode_times=False) + assert dsc.timedelta.dtype == np.dtype("int64") + assert dsc.time.dtype == np.dtype("int64") + dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False) + assert dsc.timedelta.dtype == np.dtype("int64") + assert dsc.time.dtype == np.dtype("M8[ns]") + dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True) + assert dsc.timedelta.dtype == np.dtype("m8[ns]") + assert dsc.time.dtype == np.dtype("int64") + class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): def encode_variable(self, var): From f38b0c15aa90c6812118047ae637ee67048db51f Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 19 May 2020 18:49:25 +0200 Subject: [PATCH 025/342] remove the backslash escapes and typehint fragments in the API docs (#4070) * remove numpydoc which is the reason for the backslash-escaped stars * don't install numpydoc --- ci/requirements/doc.yml | 3 +-- doc/conf.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 2987303c92a..6caebc46cdf 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -17,11 +17,10 @@ dependencies: - netcdf4>=1.5 - numba - numpy>=1.17 - - numpydoc - pandas>=1.0 - rasterio>=1.1 - seaborn - setuptools - sphinx>=2.3 - sphinx_rtd_theme>=0.4 - - zarr>=2.4 \ No newline at end of file + - zarr>=2.4 diff --git a/doc/conf.py b/doc/conf.py index 578f9cf550d..5d304dab362 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -79,7 +79,6 @@ "sphinx.ext.extlinks", "sphinx.ext.mathjax", "sphinx.ext.napoleon", - "numpydoc", "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", "nbsphinx", From 261df2e56b2d554927887b8943f84514fc60369b Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Wed, 20 May 2020 13:04:01 -0400 Subject: [PATCH 026/342] Document Xarray zarr encoding conventions (#4047) * document zarr encoding * link to zarr spec * fix typo [ci skip] --- doc/internals.rst | 50 +++++++++++++++++++++++++++++++++++++++++++++++ doc/io.rst | 6 ++++-- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/doc/internals.rst b/doc/internals.rst index c23aab8c5d7..27c7c4e1d87 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -138,3 +138,53 @@ To help users keep things straight, please `let us know `_ if you plan to write a new accessor for an open source library. In the future, we will maintain a list of accessors and the libraries that implement them on this page. + +.. _zarr_encoding: + +Zarr Encoding Specification +--------------------------- + +In implementing support for the `Zarr `_ storage +format, Xarray developers made some *ad hoc* choices about how to store +NetCDF data in Zarr. +Future versions of the Zarr spec will likely include a more formal convention +for the storage of the NetCDF data model in Zarr; see +`Zarr spec repo `_ for ongoing +discussion. + +First, Xarray can only read and write Zarr groups. There is currently no support +for reading / writting individual Zarr arrays. Zarr groups are mapped to +Xarray ``Dataset`` objects. + +Second, from Xarray's point of view, the key difference between +NetCDF and Zarr is that all NetCDF arrays have *dimension names* while Zarr +arrays do not. Therefore, in order to store NetCDF data in Zarr, Xarray must +somehow encode and decode the name of each array's dimensions. + +To accomplish this, Xarray developers decided to define a special Zarr array +attribute: ``_ARRAY_DIMENSIONS``. The value of this attribute is a list of +dimension names (strings), for example ``["time", "lon", "lat"]``. When writing +data to Zarr, Xarray sets this attribute on all variables based on the variable +dimensions. When reading a Zarr group, Xarray looks for this attribute on all +arrays, raising an error if it can't be found. The attribute is used to define +the variable dimension names and then removed from the attributes dictionary +returned to the user. + +Because of these choices, Xarray cannot read arbitrary array data, but only +Zarr data with valid ``_ARRAY_DIMENSIONS`` attributes on each array. + +After decoding the ``_ARRAY_DIMENSIONS`` attribute and assigning the variable +dimensions, Xarray proceeds to [optionally] decode each variable using its +standard CF decoding machinery used for NetCDF data (see :py:func:`decode_cf`). + +As a concrete example, here we write a tutorial dataset to Zarr and then +re-open it directly with Zarr: + +.. ipython:: python + + ds = xr.tutorial.load_dataset('rasm') + ds.to_zarr('rasm.zarr', mode='w') + import zarr + zgroup = zarr.open('rasm.zarr') + print(zgroup.tree()) + dict(zgroup['Tair'].attrs) diff --git a/doc/io.rst b/doc/io.rst index 27e182906a4..1f854586202 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -463,7 +463,7 @@ This is not CF-compliant but again facilitates roundtripping of xarray datasets. Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ -The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't +The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't allowed in netCDF4 (see `h5netcdf documentation `_). This feature is availabe through :py:meth:`DataArray.to_netcdf` and @@ -837,7 +837,9 @@ Xarray's Zarr backend allows xarray to leverage these capabilities. Xarray can't open just any zarr dataset, because xarray requires special metadata (attributes) describing the dataset dimensions and coordinates. At this time, xarray can only open zarr datasets that have been written by -xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. +xarray. For implementation details, see :ref:`zarr_encoding`. + +To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. To write to a local directory, we pass a path to a directory .. ipython:: python From cb90d5542bd6868d5548ae8efb5815c249c2c329 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 20 May 2020 19:06:39 +0200 Subject: [PATCH 027/342] Fix html repr in untrusted notebooks (plain text fallback) (#4053) * add html pre element with text repr as fallback The PRE element is not displayed when CSS is injected. When CSS is not injected (untrusted notebook), the PRE element is shown but not the DIV container used for the HTML repr. * remove title elements in svg icons Prevent showing those when fallback to plain text repr. A title tag is already present in the HTML label elements. * add basic test * update what's new --- doc/whats-new.rst | 2 ++ xarray/core/formatting_html.py | 14 ++++++++++---- xarray/static/css/style.css | 6 ++++++ xarray/static/html/icons-svg-inline.html | 2 -- xarray/tests/test_formatting_html.py | 7 +++++++ 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f5b6f2f3e86..dab48383c55 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -116,6 +116,8 @@ Bug fixes - Fix bug in time parsing failing to fall back to cftime. This was causing time variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) By `Ryan May `_. +- Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`) + By `Benoit Bovy `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 6e345582ed0..69832d6ca3d 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -222,14 +222,20 @@ def array_section(obj): ) -def _obj_repr(header_components, sections): +def _obj_repr(obj, header_components, sections): + """Return HTML repr of an xarray object. + + If CSS is not injected (untrusted notebook), fallback to the plain text repr. + + """ header = f"
{''.join(h for h in header_components)}
" sections = "".join(f"
  • {s}
  • " for s in sections) return ( "
    " f"{ICONS_SVG}" - "
    " + f"
    {escape(repr(obj))}
    " + "" @@ -257,7 +263,7 @@ def array_repr(arr): sections.append(attr_section(arr.attrs)) - return _obj_repr(header_components, sections) + return _obj_repr(arr, header_components, sections) def dataset_repr(ds): @@ -272,4 +278,4 @@ def dataset_repr(ds): attr_section(ds.attrs), ] - return _obj_repr(header_components, sections) + return _obj_repr(ds, header_components, sections) diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index acfe85d5ac7..39cd6d6755f 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -26,10 +26,16 @@ body.vscode-dark { } .xr-wrap { + display: block; min-width: 300px; max-width: 700px; } +.xr-text-repr-fallback { + /* fallback to plain text repr when CSS is not injected (untrusted notebook) */ + display: none; +} + .xr-header { padding-top: 6px; padding-bottom: 6px; diff --git a/xarray/static/html/icons-svg-inline.html b/xarray/static/html/icons-svg-inline.html index c44f89c4304..b0e837a26cd 100644 --- a/xarray/static/html/icons-svg-inline.html +++ b/xarray/static/html/icons-svg-inline.html @@ -1,13 +1,11 @@ -Show/Hide data repr -Show/Hide attributes diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 94653016416..90e74f1f78f 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -139,6 +139,13 @@ def test_repr_of_dataset(dataset): assert "<IA>" in formatted +def test_repr_text_fallback(dataset): + formatted = fh.dataset_repr(dataset) + + # Just test that the "pre" block used for fallback to plain text is present. + assert "
    " in formatted
    +
    +
     def test_variable_repr_html():
         v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"})
         assert hasattr(v, "_repr_html_")
    
    From 5c04ebfde12a70913d28ffa70e9e13c0b992dfa9 Mon Sep 17 00:00:00 2001
    From: Yunus Sevinchan 
    Date: Wed, 20 May 2020 19:08:23 +0200
    Subject: [PATCH 028/342] Add NetCDF3 dtype coercion for unsigned integer types
     (#4018)
    
    * In netcdf3 backend, also coerce unsigned integer dtypes
    
    * Adjust test for netcdf3 rountrip to include coercion
    
    This might be a bit too general for what is required at this point,
    though ... :thinking:
    
    * Add test for failing dtype coercion
    
    * Add What's New entry for issue #4014 and PR #4018
    
    * Move netcdf3-specific test to NetCDF3Only class
    
    Also uses a class variable for definition of netcdf3 formats now.
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst             |  3 +++
     xarray/backends/netcdf3.py    | 26 ++++++++++++++++++-------
     xarray/tests/test_backends.py | 36 ++++++++++++++++++++++++++++++-----
     3 files changed, 53 insertions(+), 12 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index dab48383c55..447aaf5b0bf 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -70,6 +70,9 @@ New Features
       the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`).  This
       feature requires cftime version 1.1.0 or greater.  By
       `Spencer Clark `_.
    +- For the netCDF3 backend, added dtype coercions for unsigned integer types.
    +  (:issue:`4014`, :pull:`4018`)
    +  By `Yunus Sevinchan `_
     - :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases
       where the result of a computation could not be inferred automatically.
       By `Deepak Cherian `_
    diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
    index c9c4baf9b01..51d7fce22a0 100644
    --- a/xarray/backends/netcdf3.py
    +++ b/xarray/backends/netcdf3.py
    @@ -28,7 +28,14 @@
     
     # These data-types aren't supported by netCDF3, so they are automatically
     # coerced instead as indicated by the "coerce_nc3_dtype" function
    -_nc3_dtype_coercions = {"int64": "int32", "bool": "int8"}
    +_nc3_dtype_coercions = {
    +    "int64": "int32",
    +    "uint64": "int32",
    +    "uint32": "int32",
    +    "uint16": "int16",
    +    "uint8": "int8",
    +    "bool": "int8",
    +}
     
     # encode all strings as UTF-8
     STRING_ENCODING = "utf-8"
    @@ -37,12 +44,17 @@
     def coerce_nc3_dtype(arr):
         """Coerce an array to a data type that can be stored in a netCDF-3 file
     
    -    This function performs the following dtype conversions:
    -        int64 -> int32
    -        bool -> int8
    -
    -    Data is checked for equality, or equivalence (non-NaN values) with
    -    `np.allclose` with the default keyword arguments.
    +    This function performs the dtype conversions as specified by the
    +    ``_nc3_dtype_coercions`` mapping:
    +        int64  -> int32
    +        uint64 -> int32
    +        uint32 -> int32
    +        uint16 -> int16
    +        uint8  -> int8
    +        bool   -> int8
    +
    +    Data is checked for equality, or equivalence (non-NaN values) using the
    +    ``(cast_array == original_array).all()``.
         """
         dtype = str(arr.dtype)
         if dtype in _nc3_dtype_coercions:
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 90deea51d2a..280b8bff63d 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -30,6 +30,7 @@
         save_mfdataset,
     )
     from xarray.backends.common import robust_getitem
    +from xarray.backends.netcdf3 import _nc3_dtype_coercions
     from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding
     from xarray.backends.pydap_ import PydapDataStore
     from xarray.coding.variables import SerializationWarning
    @@ -227,7 +228,27 @@ def __getitem__(self, key):
     
     
     class NetCDF3Only:
    -    pass
    +    netcdf3_formats = ("NETCDF3_CLASSIC", "NETCDF3_64BIT")
    +
    +    @requires_scipy
    +    def test_dtype_coercion_error(self):
    +        """Failing dtype coercion should lead to an error"""
    +        for dtype, format in itertools.product(
    +            _nc3_dtype_coercions, self.netcdf3_formats
    +        ):
    +            if dtype == "bool":
    +                # coerced upcast (bool to int8) ==> can never fail
    +                continue
    +
    +            # Using the largest representable value, create some data that will
    +            # no longer compare equal after the coerced downcast
    +            maxval = np.iinfo(dtype).max
    +            x = np.array([0, 1, 2, maxval], dtype=dtype)
    +            ds = Dataset({"x": ("t", x, {})})
    +
    +            with create_tmp_file(allow_cleanup_failure=False) as path:
    +                with pytest.raises(ValueError, match="could not safely cast"):
    +                    ds.to_netcdf(path, format=format)
     
     
     class DatasetIOBase:
    @@ -296,9 +317,14 @@ def test_write_store(self):
         def check_dtypes_roundtripped(self, expected, actual):
             for k in expected.variables:
                 expected_dtype = expected.variables[k].dtype
    -            if isinstance(self, NetCDF3Only) and expected_dtype == "int64":
    -                # downcast
    -                expected_dtype = np.dtype("int32")
    +
    +            # For NetCDF3, the backend should perform dtype coercion
    +            if (
    +                isinstance(self, NetCDF3Only)
    +                and str(expected_dtype) in _nc3_dtype_coercions
    +            ):
    +                expected_dtype = np.dtype(_nc3_dtype_coercions[str(expected_dtype)])
    +
                 actual_dtype = actual.variables[k].dtype
                 # TODO: check expected behavior for string dtypes more carefully
                 string_kinds = {"O", "S", "U"}
    @@ -2156,7 +2182,7 @@ def test_cross_engine_read_write_netcdf3(self):
                 valid_engines.add("scipy")
     
             for write_engine in valid_engines:
    -            for format in ["NETCDF3_CLASSIC", "NETCDF3_64BIT"]:
    +            for format in self.netcdf3_formats:
                     with create_tmp_file() as tmp_file:
                         data.to_netcdf(tmp_file, format=format, engine=write_engine)
                         for read_engine in valid_engines:
    
    From 484d1ce5ff8969b6ca6fa942b344379725f33b9c Mon Sep 17 00:00:00 2001
    From: =?UTF-8?q?Aur=C3=A9lien=20Ponte?= 
    Date: Wed, 20 May 2020 20:55:32 +0200
    Subject: [PATCH 029/342] improve to_zarr doc about chunking (#4048)
    MIME-Version: 1.0
    Content-Type: text/plain; charset=UTF-8
    Content-Transfer-Encoding: 8bit
    
    * Update dataset.py
    
    * attempt at improving the doc formulation
    
    * update to_zarr docstring
    
    * minor style update
    
    * seems to fix doc compilation locally
    
    * delete saved_on_disk.nc
    
    Co-authored-by: Aurélien Ponte 
    ---
     xarray/core/dataset.py | 9 +++++++++
     1 file changed, 9 insertions(+)
    
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 3a55f3eca27..d93f4044a6d 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -1604,6 +1604,15 @@ def to_zarr(
             References
             ----------
             https://zarr.readthedocs.io/
    +
    +        Notes
    +        -----
    +        Zarr chunking behavior:
    +            If chunks are found in the encoding argument or attribute
    +            corresponding to any DataArray, those chunks are used.
    +            If a DataArray is a dask array, it is written with those chunks.
    +            If not other chunks are found, Zarr uses its own heuristics to
    +            choose automatic chunk sizes.
             """
             if encoding is None:
                 encoding = {}
    
    From 19b088636eb7d3f65ab7a1046ac672e0689371d8 Mon Sep 17 00:00:00 2001
    From: Prajjwal Nijhara 
    Date: Sat, 23 May 2020 23:00:50 +0530
    Subject: [PATCH 030/342] fix dangerous default arguments (#4006)
    
    Co-authored-by: Keewis 
    ---
     xarray/tests/test_backends.py    | 15 +++++++++++++--
     xarray/tests/test_conventions.py |  6 +++++-
     2 files changed, 18 insertions(+), 3 deletions(-)
    
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 280b8bff63d..49a39474b54 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -87,6 +87,7 @@
         dask_version = "10.0"
     
     ON_WINDOWS = sys.platform == "win32"
    +default_value = object()
     
     
     def open_example_dataset(name, *args, **kwargs):
    @@ -3630,11 +3631,21 @@ def create_tmp_geotiff(
         ny=3,
         nz=3,
         transform=None,
    -    transform_args=[5000, 80000, 1000, 2000.0],
    -    crs={"units": "m", "no_defs": True, "ellps": "WGS84", "proj": "utm", "zone": 18},
    +    transform_args=default_value,
    +    crs=default_value,
         open_kwargs=None,
         additional_attrs=None,
     ):
    +    if transform_args is default_value:
    +        transform_args = [5000, 80000, 1000, 2000.0]
    +    if crs is default_value:
    +        crs = {
    +            "units": "m",
    +            "no_defs": True,
    +            "ellps": "WGS84",
    +            "proj": "utm",
    +            "zone": 18,
    +        }
         # yields a temporary geotiff file and a corresponding expected DataArray
         import rasterio
         from rasterio.transform import from_origin
    diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
    index dfd20a303ff..a5f4324d182 100644
    --- a/xarray/tests/test_conventions.py
    +++ b/xarray/tests/test_conventions.py
    @@ -363,8 +363,12 @@ def create_store(self):
     
         @contextlib.contextmanager
         def roundtrip(
    -        self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=False
    +        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
         ):
    +        if save_kwargs is None:
    +            save_kwargs = {}
    +        if open_kwargs is None:
    +            open_kwargs = {}
             store = CFEncodedInMemoryStore()
             data.dump_to_store(store, **save_kwargs)
             yield open_dataset(store, **open_kwargs)
    
    From f3ffab7ee4593c97e2ae63f22140d0a823a64b6d Mon Sep 17 00:00:00 2001
    From: Mathias Hauser 
    Date: Sat, 23 May 2020 23:06:18 +0200
    Subject: [PATCH 031/342] Fix bool weights (#4075)
    
    * add tests
    
    * weights: bool -> int
    
    * whats new
    
    * Apply suggestions from code review
    
    * avoid unecessary copy
    
    Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
    ---
     doc/whats-new.rst             |  4 +++-
     xarray/core/weighted.py       |  9 ++++++++-
     xarray/tests/test_weighted.py | 23 +++++++++++++++++++++++
     3 files changed, 34 insertions(+), 2 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 447aaf5b0bf..a4602c1edad 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -119,6 +119,8 @@ Bug fixes
     - Fix bug in time parsing failing to fall back to cftime. This was causing time
       variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`)
       By `Ryan May `_.
    +- Fix weighted mean when passing boolean weights (:issue:`4074`).
    +  By `Mathias Hauser `_.
     - Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`)
       By `Benoit Bovy `_.
     
    @@ -186,7 +188,7 @@ New Features
     
     - Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted`
       and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`).
    -  By `Mathias Hauser `_
    +  By `Mathias Hauser `_.
     - The new jupyter notebook repr (``Dataset._repr_html_`` and
       ``DataArray._repr_html_``) (introduced in 0.14.1) is now on by default. To
       disable, use ``xarray.set_options(display_style="text")``.
    diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
    index 996d2e4c43e..21ed06ea85f 100644
    --- a/xarray/core/weighted.py
    +++ b/xarray/core/weighted.py
    @@ -142,7 +142,14 @@ def _sum_of_weights(
             # we need to mask data values that are nan; else the weights are wrong
             mask = da.notnull()
     
    -        sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False)
    +        # bool -> int, because ``xr.dot([True, True], [True, True])`` -> True
    +        # (and not 2); GH4074
    +        if self.weights.dtype == bool:
    +            sum_of_weights = self._reduce(
    +                mask, self.weights.astype(int), dim=dim, skipna=False
    +            )
    +        else:
    +            sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False)
     
             # 0-weights are not valid
             valid_weights = sum_of_weights != 0.0
    diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py
    index 24531215dfb..1bf685cc95d 100644
    --- a/xarray/tests/test_weighted.py
    +++ b/xarray/tests/test_weighted.py
    @@ -59,6 +59,18 @@ def test_weighted_sum_of_weights_nan(weights, expected):
         assert_equal(expected, result)
     
     
    +def test_weighted_sum_of_weights_bool():
    +    # https://github.com/pydata/xarray/issues/4074
    +
    +    da = DataArray([1, 2])
    +    weights = DataArray([True, True])
    +    result = da.weighted(weights).sum_of_weights()
    +
    +    expected = DataArray(2)
    +
    +    assert_equal(expected, result)
    +
    +
     @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan]))
     @pytest.mark.parametrize("factor", [0, 1, 3.14])
     @pytest.mark.parametrize("skipna", (True, False))
    @@ -158,6 +170,17 @@ def test_weighted_mean_nan(weights, expected, skipna):
         assert_equal(expected, result)
     
     
    +def test_weighted_mean_bool():
    +    # https://github.com/pydata/xarray/issues/4074
    +    da = DataArray([1, 1])
    +    weights = DataArray([True, True])
    +    expected = DataArray(1)
    +
    +    result = da.weighted(weights).mean()
    +
    +    assert_equal(expected, result)
    +
    +
     def expected_weighted(da, weights, dim, skipna, operation):
         """
         Generate expected result using ``*`` and ``sum``. This is checked against
    
    From bdb1d331ac685fbc1371a3b98a795545e1682e7e Mon Sep 17 00:00:00 2001
    From: Mathias Hauser 
    Date: Mon, 25 May 2020 18:32:14 +0200
    Subject: [PATCH 032/342] allow multiindex levels in plots (#3938)
    
    * allow multiindex levels in plots
    
    * query label for test
    
    * 2D plts adapt err msg
    
    * 1D plts adapt err msg
    
    * add errmsg x==y
    
    * WIP _assert_xy_valid
    
    * _assert_valid_xy
    
    * add 1D example
    
    * update docs
    
    * simplify error msg
    
    * remove '
    
    * Apply suggestions from code review
    ---
     doc/plotting.rst          | 40 +++++++++++++++++++-
     doc/whats-new.rst         |  2 +
     xarray/plot/plot.py       | 19 +++++-----
     xarray/plot/utils.py      | 39 ++++++++++++++++----
     xarray/tests/test_plot.py | 77 ++++++++++++++++++++++++++++++++-------
     5 files changed, 145 insertions(+), 32 deletions(-)
    
    diff --git a/doc/plotting.rst b/doc/plotting.rst
    index 40c0ca1a496..f98f47f2567 100644
    --- a/doc/plotting.rst
    +++ b/doc/plotting.rst
    @@ -13,7 +13,7 @@ labels can also be used to easily create informative plots.
     xarray's plotting capabilities are centered around
     :py:class:`DataArray` objects.
     To plot :py:class:`Dataset` objects
    -simply access the relevant DataArrays, ie ``dset['var1']``.
    +simply access the relevant DataArrays, i.e. ``dset['var1']``.
     Dataset specific plotting routines are also available (see :ref:`plot-dataset`).
     Here we focus mostly on arrays 2d or larger. If your data fits
     nicely into a pandas DataFrame then you're better off using one of the more
    @@ -209,6 +209,44 @@ entire figure (as for matplotlib's ``figsize`` argument).
     
     .. _plotting.multiplelines:
     
    +=========================
    + Determine x-axis values
    +=========================
    +
    +Per default dimension coordinates are used for the x-axis (here the time coordinates).
    +However, you can also use non-dimension coordinates, MultiIndex levels, and dimensions
    +without coordinates along the x-axis. To illustrate this, let's calculate a 'decimal day' (epoch)
    +from the time and assign it as a non-dimension coordinate:
    +
    +.. ipython:: python
    +
    +    decimal_day = (air1d.time - air1d.time[0]) /  pd.Timedelta('1d')
    +    air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day))
    +    air1d_multi
    +
    +To use ``'decimal_day'`` as x coordinate it must be explicitly specified:
    +
    +.. ipython:: python
    +
    +    air1d_multi.plot(x="decimal_day")
    +
    +Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``,
    +it is also possible to use a MultiIndex level as x-axis:
    +
    +.. ipython:: python
    +
    +    air1d_multi = air1d_multi.set_index(date=("time", "decimal_day"))
    +    air1d_multi.plot(x="decimal_day")
    +
    +Finally, if a dataset does not have any coordinates it enumerates all data points:
    +
    +.. ipython:: python
    +
    +    air1d_multi = air1d_multi.drop("date")
    +    air1d_multi.plot()
    +
    +The same applies to 2D plots below.
    +
     ====================================================
      Multiple lines showing variation along a dimension
     ====================================================
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index a4602c1edad..0be988da690 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -63,6 +63,8 @@ New Features
       By `Stephan Hoyer `_.
     - Allow plotting of boolean arrays. (:pull:`3766`)
       By `Marek Jacob `_
    +- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`). 
    +  By `Mathias Hauser `_.
     - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to
       the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which
       returns the days in the month each datetime in the index.  Now days in month
    diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
    index 4d6033bf00d..19a3f1e63e3 100644
    --- a/xarray/plot/plot.py
    +++ b/xarray/plot/plot.py
    @@ -14,6 +14,7 @@
     from .facetgrid import _easy_facetgrid
     from .utils import (
         _add_colorbar,
    +    _assert_valid_xy,
         _ensure_plottable,
         _infer_interval_breaks,
         _infer_xy_labels,
    @@ -29,19 +30,17 @@
     
     
     def _infer_line_data(darray, x, y, hue):
    -    error_msg = "must be either None or one of ({:s})".format(
    -        ", ".join(repr(dd) for dd in darray.dims)
    -    )
    +
         ndims = len(darray.dims)
     
    -    if x is not None and x not in darray.dims and x not in darray.coords:
    -        raise ValueError("x " + error_msg)
    +    if x is not None and y is not None:
    +        raise ValueError("Cannot specify both x and y kwargs for line plots.")
     
    -    if y is not None and y not in darray.dims and y not in darray.coords:
    -        raise ValueError("y " + error_msg)
    +    if x is not None:
    +        _assert_valid_xy(darray, x, "x")
     
    -    if x is not None and y is not None:
    -        raise ValueError("You cannot specify both x and y kwargs" "for line plots.")
    +    if y is not None:
    +        _assert_valid_xy(darray, y, "y")
     
         if ndims == 1:
             huename = None
    @@ -252,7 +251,7 @@ def line(
             Dimension or coordinate for which you want multiple lines plotted.
             If plotting against a 2D coordinate, ``hue`` must be a dimension.
         x, y : string, optional
    -        Dimensions or coordinates for x, y axis.
    +        Dimension, coordinate or MultiIndex level for x, y axis.
             Only one of these may be specified.
             The other coordinate plots values from the DataArray on which this
             plot method is called.
    diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
    index cb993c192d9..e5c1fa89333 100644
    --- a/xarray/plot/utils.py
    +++ b/xarray/plot/utils.py
    @@ -360,7 +360,9 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None):
     
         darray must be a 2 dimensional data array, or 3d for imshow only.
         """
    -    assert x is None or x != y
    +    if (x is not None) and (x == y):
    +        raise ValueError("x and y cannot be equal.")
    +
         if imshow and darray.ndim == 3:
             return _infer_xy_labels_3d(darray, x, y, rgb)
     
    @@ -369,18 +371,41 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None):
                 raise ValueError("DataArray must be 2d")
             y, x = darray.dims
         elif x is None:
    -        if y not in darray.dims and y not in darray.coords:
    -            raise ValueError("y must be a dimension name if x is not supplied")
    +        _assert_valid_xy(darray, y, "y")
             x = darray.dims[0] if y == darray.dims[1] else darray.dims[1]
         elif y is None:
    -        if x not in darray.dims and x not in darray.coords:
    -            raise ValueError("x must be a dimension name if y is not supplied")
    +        _assert_valid_xy(darray, x, "x")
             y = darray.dims[0] if x == darray.dims[1] else darray.dims[1]
    -    elif any(k not in darray.coords and k not in darray.dims for k in (x, y)):
    -        raise ValueError("x and y must be coordinate variables")
    +    else:
    +        _assert_valid_xy(darray, x, "x")
    +        _assert_valid_xy(darray, y, "y")
    +
    +        if (
    +            all(k in darray._level_coords for k in (x, y))
    +            and darray._level_coords[x] == darray._level_coords[y]
    +        ):
    +            raise ValueError("x and y cannot be levels of the same MultiIndex")
    +
         return x, y
     
     
    +def _assert_valid_xy(darray, xy, name):
    +    """
    +    make sure x and y passed to plotting functions are valid
    +    """
    +
    +    # MultiIndex cannot be plotted; no point in allowing them here
    +    multiindex = set([darray._level_coords[lc] for lc in darray._level_coords])
    +
    +    valid_xy = (
    +        set(darray.dims) | set(darray.coords) | set(darray._level_coords)
    +    ) - multiindex
    +
    +    if xy not in valid_xy:
    +        valid_xy_str = "', '".join(sorted(valid_xy))
    +        raise ValueError(f"{name} must be one of None, '{valid_xy_str}'")
    +
    +
     def get_axis(figsize, size, aspect, ax):
         import matplotlib as mpl
         import matplotlib.pyplot as plt
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index af7c686bf60..6497987e813 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -136,7 +136,7 @@ def test_label_from_attrs(self):
         def test1d(self):
             self.darray[:, 0, 0].plot()
     
    -        with raises_regex(ValueError, "None"):
    +        with raises_regex(ValueError, "x must be one of None, 'dim_0'"):
                 self.darray[:, 0, 0].plot(x="dim_1")
     
             with raises_regex(TypeError, "complex128"):
    @@ -155,14 +155,31 @@ def test_1d_x_y_kw(self):
             for aa, (x, y) in enumerate(xy):
                 da.plot(x=x, y=y, ax=ax.flat[aa])
     
    -        with raises_regex(ValueError, "cannot"):
    +        with raises_regex(ValueError, "Cannot specify both"):
                 da.plot(x="z", y="z")
     
    -        with raises_regex(ValueError, "None"):
    -            da.plot(x="f", y="z")
    +        error_msg = "must be one of None, 'z'"
    +        with raises_regex(ValueError, f"x {error_msg}"):
    +            da.plot(x="f")
     
    -        with raises_regex(ValueError, "None"):
    -            da.plot(x="z", y="f")
    +        with raises_regex(ValueError, f"y {error_msg}"):
    +            da.plot(y="f")
    +
    +    def test_multiindex_level_as_coord(self):
    +        da = xr.DataArray(
    +            np.arange(5),
    +            dims="x",
    +            coords=dict(a=("x", np.arange(5)), b=("x", np.arange(5, 10))),
    +        )
    +        da = da.set_index(x=["a", "b"])
    +
    +        for x in ["a", "b"]:
    +            h = da.plot(x=x)[0]
    +            assert_array_equal(h.get_xdata(), da[x].values)
    +
    +        for y in ["a", "b"]:
    +            h = da.plot(y=y)[0]
    +            assert_array_equal(h.get_ydata(), da[y].values)
     
         # Test for bug in GH issue #2725
         def test_infer_line_data(self):
    @@ -211,7 +228,7 @@ def test_2d_line(self):
             self.darray[:, :, 0].plot.line(x="dim_0", hue="dim_1")
             self.darray[:, :, 0].plot.line(y="dim_0", hue="dim_1")
     
    -        with raises_regex(ValueError, "cannot"):
    +        with raises_regex(ValueError, "Cannot"):
                 self.darray[:, :, 0].plot.line(x="dim_1", y="dim_0", hue="dim_1")
     
         def test_2d_line_accepts_legend_kw(self):
    @@ -1032,6 +1049,16 @@ def test_nonnumeric_index_raises_typeerror(self):
             with raises_regex(TypeError, r"[Pp]lot"):
                 self.plotfunc(a)
     
    +    def test_multiindex_raises_typeerror(self):
    +        a = DataArray(
    +            easy_array((3, 2)),
    +            dims=("x", "y"),
    +            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
    +        )
    +        a = a.set_index(y=("a", "b"))
    +        with raises_regex(TypeError, r"[Pp]lot"):
    +            self.plotfunc(a)
    +
         def test_can_pass_in_axis(self):
             self.pass_in_axis(self.plotmethod)
     
    @@ -1140,15 +1167,16 @@ def test_positional_coord_string(self):
             assert "y_long_name [y_units]" == ax.get_ylabel()
     
         def test_bad_x_string_exception(self):
    -        with raises_regex(ValueError, "x and y must be coordinate variables"):
    +
    +        with raises_regex(ValueError, "x and y cannot be equal."):
    +            self.plotmethod(x="y", y="y")
    +
    +        error_msg = "must be one of None, 'x', 'x2d', 'y', 'y2d'"
    +        with raises_regex(ValueError, f"x {error_msg}"):
                 self.plotmethod("not_a_real_dim", "y")
    -        with raises_regex(
    -            ValueError, "x must be a dimension name if y is not supplied"
    -        ):
    +        with raises_regex(ValueError, f"x {error_msg}"):
                 self.plotmethod(x="not_a_real_dim")
    -        with raises_regex(
    -            ValueError, "y must be a dimension name if x is not supplied"
    -        ):
    +        with raises_regex(ValueError, f"y {error_msg}"):
                 self.plotmethod(y="not_a_real_dim")
             self.darray.coords["z"] = 100
     
    @@ -1183,6 +1211,27 @@ def test_non_linked_coords_transpose(self):
             # simply ensure that these high coords were passed over
             assert np.min(ax.get_xlim()) > 100.0
     
    +    def test_multiindex_level_as_coord(self):
    +        da = DataArray(
    +            easy_array((3, 2)),
    +            dims=("x", "y"),
    +            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
    +        )
    +        da = da.set_index(y=["a", "b"])
    +
    +        for x, y in (("a", "x"), ("b", "x"), ("x", "a"), ("x", "b")):
    +            self.plotfunc(da, x=x, y=y)
    +
    +            ax = plt.gca()
    +            assert x == ax.get_xlabel()
    +            assert y == ax.get_ylabel()
    +
    +        with raises_regex(ValueError, "levels of the same MultiIndex"):
    +            self.plotfunc(da, x="a", y="b")
    +
    +        with raises_regex(ValueError, "y must be one of None, 'a', 'b', 'x'"):
    +            self.plotfunc(da, x="a", y="y")
    +
         def test_default_title(self):
             a = DataArray(easy_array((4, 3, 2)), dims=["a", "b", "c"])
             a.coords["c"] = [0, 1]
    
    From 3194b3ed1e414729ba6ab6f7f3ed39a425da42b1 Mon Sep 17 00:00:00 2001
    From: Andrew Williams <56925856+AndrewWilliams3142@users.noreply.github.com>
    Date: Mon, 25 May 2020 17:55:33 +0100
    Subject: [PATCH 033/342] xr.cov() and xr.corr() (#4089)
    
    * Added chunks='auto' option in dataset.py
    
    * reverted accidental changes in dataset.chunk()
    
    * Added corr and cov to computation.py. Taken from r-beer:xarray/corr
    
    * Added r-beer's tests to test_computation.py
    
    Still issues I think
    
    * trying to fix github.com/pydata/xarray/pull/3550#discussion_r349935731
    
    * Removing drop=True from the `.where()` calls in `computation.py`+test.py
    
    * api.rst and whats-new.rst
    
    * Updated `xarray/__init__.py` and added `broadcast` import to computation
    
    * added DataArray import to corr, cov
    
    * assert_allclose added to test_computation.py
    
    * removed whitespace in test_dask...oops
    
    * Added to init
    
    * format changes
    
    * Fiddling around with cov/corr tests in `test_computation.py`
    
    * PEP8 changes
    
    * pep
    
    * remove old todo and comments
    
    * isort
    
    * Added consistency check between corr() and cov(), ensure they give same
    
    * added `skipna=False` to `computation.py`. made consistency+autocov tests
    
    * formatting
    
    * Added numpy-based tests.
    
    * format
    
    * formatting again
    
    * Update doc/whats-new.rst
    
    Co-authored-by: keewis 
    
    * refactored corr/cov so there is one internal method for calculating both
    
    * formatting
    
    * updating docstrings and code suggestions from PR
    
    * paramterize ddof in tests
    
    * removed extraneous test arrays
    
    * formatting + adding deterministic docstring
    
    * added test for TypeError
    
    * formatting
    
    * tidying up docstring
    
    * formatting and tidying up `_cov_corr()` so that the logic is more clear
    
    * flake8 ...
    
    Co-authored-by: keewis 
    ---
     doc/api.rst                      |   2 +
     doc/whats-new.rst                |   2 +
     xarray/__init__.py               |   4 +-
     xarray/core/computation.py       | 180 ++++++++++++++++++++++++++++++-
     xarray/tests/test_computation.py | 158 ++++++++++++++++++++++++++-
     5 files changed, 343 insertions(+), 3 deletions(-)
    
    diff --git a/doc/api.rst b/doc/api.rst
    index 8ec6843d24a..c9f24e8c3f1 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -29,6 +29,8 @@ Top-level functions
        full_like
        zeros_like
        ones_like
    +   cov
    +   corr
        dot
        polyval
        map_blocks
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 0be988da690..d4a46c1e020 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -36,6 +36,8 @@ Breaking changes
     
     New Features
     ~~~~~~~~~~~~
    +- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
    +  By `Andrew Williams `_ and `Robin Beer `_.
     - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
       By `Pascal Bourgault `_.
     - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
    diff --git a/xarray/__init__.py b/xarray/__init__.py
    index 0fead57e5fb..e8274d13ffe 100644
    --- a/xarray/__init__.py
    +++ b/xarray/__init__.py
    @@ -17,7 +17,7 @@
     from .core.alignment import align, broadcast
     from .core.combine import auto_combine, combine_by_coords, combine_nested
     from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
    -from .core.computation import apply_ufunc, dot, polyval, where
    +from .core.computation import apply_ufunc, corr, cov, dot, polyval, where
     from .core.concat import concat
     from .core.dataarray import DataArray
     from .core.dataset import Dataset
    @@ -54,6 +54,8 @@
         "concat",
         "decode_cf",
         "dot",
    +    "cov",
    +    "corr",
         "full_like",
         "load_dataarray",
         "load_dataset",
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 28bf818e4a3..6ac4f74c3a6 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -24,7 +24,7 @@
     import numpy as np
     
     from . import dtypes, duck_array_ops, utils
    -from .alignment import deep_align
    +from .alignment import align, deep_align
     from .merge import merge_coordinates_without_align
     from .options import OPTIONS
     from .pycompat import dask_array_type
    @@ -1069,6 +1069,184 @@ def earth_mover_distance(first_samples,
             return apply_array_ufunc(func, *args, dask=dask)
     
     
    +def cov(da_a, da_b, dim=None, ddof=1):
    +    """
    +    Compute covariance between two DataArray objects along a shared dimension.
    +
    +    Parameters
    +    ----------
    +    da_a: DataArray object
    +        Array to compute.
    +    da_b: DataArray object
    +        Array to compute.
    +    dim : str, optional
    +        The dimension along which the covariance will be computed
    +    ddof: int, optional
    +        If ddof=1, covariance is normalized by N-1, giving an unbiased estimate,
    +        else normalization is by N.
    +
    +    Returns
    +    -------
    +    covariance: DataArray
    +
    +    See also
    +    --------
    +    pandas.Series.cov: corresponding pandas function
    +    xr.corr: respective function to calculate correlation
    +
    +    Examples
    +    --------
    +    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a
    +    
    +    array([[1. , 2. , 3. ],
    +           [0.1, 0.2, 0.3],
    +           [3.2, 0.6, 1.8]])
    +    Coordinates:
    +      * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b
    +    
    +    array([[ 0.2,  0.4,  0.6],
    +           [15. , 10. ,  5. ],
    +           [ 3.2,  0.6,  1.8]])
    +    Coordinates:
    +      * space    (space) >> xr.cov(da_a, da_b)
    +    
    +    array(-3.53055556)
    +    >>> xr.cov(da_a, da_b, dim='time')
    +    
    +    array([ 0.2, -0.5,  1.69333333])
    +    Coordinates:
    +      * space    (space) >> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a
    +    
    +    array([[1. , 2. , 3. ],
    +           [0.1, 0.2, 0.3],
    +           [3.2, 0.6, 1.8]])
    +    Coordinates:
    +      * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b
    +    
    +    array([[ 0.2,  0.4,  0.6],
    +           [15. , 10. ,  5. ],
    +           [ 3.2,  0.6,  1.8]])
    +    Coordinates:
    +      * space    (space) >> xr.corr(da_a, da_b)
    +    
    +    array(-0.57087777)
    +    >>> xr.corr(da_a, da_b, dim='time')
    +    
    +    array([ 1., -1.,  1.])
    +    Coordinates:
    +      * space    (space) 
    Date: Mon, 25 May 2020 20:23:44 +0100
    Subject: [PATCH 034/342] Auto chunk (#4064)
    MIME-Version: 1.0
    Content-Type: text/plain; charset=UTF-8
    Content-Transfer-Encoding: 8bit
    
    * Added chunks='auto' option in dataset.py
    
    * FIX: correct dask array handling in _calc_idxminmax (#3922)
    
    * FIX: correct dask array handling in _calc_idxminmax
    
    * FIX: remove unneeded import, reformat via black
    
    * fix idxmax, idxmin with dask arrays
    
    * FIX: use array[dim].data in `_calc_idxminmax` as per @keewis suggestion, attach dim name to result
    
    * ADD: add dask tests to `idxmin`/`idxmax` dataarray tests
    
    * FIX: add back fixture line removed by accident
    
    * ADD: complete dask handling in `idxmin`/`idxmax` tests in test_dataarray, xfail dask tests for dtype dateime64 (M)
    
    * ADD: add "support dask handling for idxmin/idxmax" in whats-new.rst
    
    * MIN: reintroduce changes added by #3953
    
    * MIN: change if-clause to use `and` instead of `&` as per review-comment
    
    * MIN: change if-clause to use `and` instead of `&` as per review-comment
    
    * WIP: remove dask handling entirely for debugging purposes
    
    * Test for dask computes
    
    * WIP: re-add dask handling (map_blocks-approach), add `with raise_if_dask_computes()` context to idxmin-tests
    
    * Use dask indexing instead of map_blocks.
    
    * Better chunk choice.
    
    * Return -1 for _nan_argminmax_object if all NaNs along dim
    
    * Revert "Return -1 for _nan_argminmax_object if all NaNs along dim"
    
    This reverts commit 58901b9da821a04f2ec085577cb916c4d67f6f50.
    
    * Raise error for object arrays
    
    * No error for object arrays. Instead expect 1 compute in tests.
    
    Co-authored-by: dcherian 
    
    * fix the failing flake8 CI (#4057)
    
    * rename d and l to dim and length
    
    * Fixed typo in rasterio docs (#4063)
    
    * Added chunks='auto' option in dataset.py
    
    Added changes to whats-new.rst
    
    * Added chunks='auto' option in dataset.py
    
    Added changes to whats-new.rst
    
    * Error fix, catch chunks=None
    
    * Minor reformatting + flake8 changes
    
    * Added isinstance(chunks, (Number, str)) in dataset.py, passing
    
    * format changes
    
    * added auto-chunk test for dataarrays
    
    * Assert chunk sizes equal in auto-chunk test
    
    Co-authored-by: Kai Mühlbauer 
    Co-authored-by: dcherian 
    Co-authored-by: keewis 
    Co-authored-by: clausmichele <31700619+clausmichele@users.noreply.github.com>
    Co-authored-by: Keewis 
    ---
     doc/whats-new.rst         | 4 ++++
     xarray/core/dataset.py    | 9 ++++++---
     xarray/tests/test_dask.py | 8 ++++++++
     3 files changed, 18 insertions(+), 3 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index d4a46c1e020..e1012283c94 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -36,6 +36,10 @@ Breaking changes
     
     New Features
     ~~~~~~~~~~~~
    +
    +- ``chunks='auto'`` is now supported in the ``chunks`` argument of
    +  :py:meth:`Dataset.chunk`. (:issue:`4055`)
    +  By `Andrew Williams `_ 
     - Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
       By `Andrew Williams `_ and `Robin Beer `_.
     - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index d93f4044a6d..43f6ad9c90e 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -1707,7 +1707,10 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         def chunk(
             self,
             chunks: Union[
    -            None, Number, Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]]
    +            None,
    +            Number,
    +            str,
    +            Mapping[Hashable, Union[None, Number, str, Tuple[Number, ...]]],
             ] = None,
             name_prefix: str = "xarray-",
             token: str = None,
    @@ -1725,7 +1728,7 @@ def chunk(
     
             Parameters
             ----------
    -        chunks : int or mapping, optional
    +        chunks : int, 'auto' or mapping, optional
                 Chunk sizes along each dimension, e.g., ``5`` or
                 ``{'x': 5, 'y': 5}``.
             name_prefix : str, optional
    @@ -1742,7 +1745,7 @@ def chunk(
             """
             from dask.base import tokenize
     
    -        if isinstance(chunks, Number):
    +        if isinstance(chunks, (Number, str)):
                 chunks = dict.fromkeys(self.dims, chunks)
     
             if chunks is not None:
    diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
    index 75beb3757ca..6f714fe1825 100644
    --- a/xarray/tests/test_dask.py
    +++ b/xarray/tests/test_dask.py
    @@ -1035,6 +1035,14 @@ def test_unify_chunks_shallow_copy(obj, transform):
         assert_identical(obj, unified) and obj is not obj.unify_chunks()
     
     
    +@pytest.mark.parametrize("obj", [make_da()])
    +def test_auto_chunk_da(obj):
    +    actual = obj.chunk("auto").data
    +    expected = obj.data.rechunk("auto")
    +    np.testing.assert_array_equal(actual, expected)
    +    assert actual.chunks == expected.chunks
    +
    +
     def test_map_blocks_error(map_da, map_ds):
         def bad_func(darray):
             return (darray * darray.x + 5 * darray.y)[:1, :1]
    
    From d1f7cb8fd95d588d3f7a7e90916c25747b90ad5a Mon Sep 17 00:00:00 2001
    From: Keisuke Fujii 
    Date: Tue, 26 May 2020 05:02:36 +0900
    Subject: [PATCH 035/342] Improve interp performance (#4069)
    
    * Fixes 2223
    
    * more tests
    
    * add @requires_scipy to test
    
    * fix tests
    
    * black
    
    * update whatsnew. Added a test for nearest
    ---
     doc/whats-new.rst           |  7 +++++++
     xarray/core/missing.py      | 15 ++++++++++++++-
     xarray/testing.py           |  7 +------
     xarray/tests/test_interp.py | 18 ++++++++++++++++++
     4 files changed, 40 insertions(+), 7 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e1012283c94..59c7faa8973 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -34,6 +34,13 @@ Breaking changes
       (:pull:`3274`)
       By `Elliott Sales de Andrade `_
     
    +Enhancements
    +~~~~~~~~~~~~
    +- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` 
    +  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially 
    +  rather than interpolating in multidimensional space. (:issue:`2223`)
    +  By `Keisuke Fujii `_.
    +
     New Features
     ~~~~~~~~~~~~
     
    diff --git a/xarray/core/missing.py b/xarray/core/missing.py
    index f973b4a5468..374eaec1fa7 100644
    --- a/xarray/core/missing.py
    +++ b/xarray/core/missing.py
    @@ -619,6 +619,19 @@ def interp(var, indexes_coords, method, **kwargs):
         # default behavior
         kwargs["bounds_error"] = kwargs.get("bounds_error", False)
     
    +    # check if the interpolation can be done in orthogonal manner
    +    if (
    +        len(indexes_coords) > 1
    +        and method in ["linear", "nearest"]
    +        and all(dest[1].ndim == 1 for dest in indexes_coords.values())
    +        and len(set([d[1].dims[0] for d in indexes_coords.values()]))
    +        == len(indexes_coords)
    +    ):
    +        # interpolate sequentially
    +        for dim, dest in indexes_coords.items():
    +            var = interp(var, {dim: dest}, method, **kwargs)
    +        return var
    +
         # target dimensions
         dims = list(indexes_coords)
         x, new_x = zip(*[indexes_coords[d] for d in dims])
    @@ -659,7 +672,7 @@ def interp_func(var, x, new_x, method, kwargs):
             New coordinates. Should not contain NaN.
         method: string
             {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} for
    -        1-dimensional itnterpolation.
    +        1-dimensional interpolation.
             {'linear', 'nearest'} for multidimensional interpolation
         **kwargs:
             Optional keyword arguments to be passed to scipy.interpolator
    diff --git a/xarray/testing.py b/xarray/testing.py
    index ac189f7e023..e7bf5f9221a 100644
    --- a/xarray/testing.py
    +++ b/xarray/testing.py
    @@ -10,12 +10,7 @@
     from xarray.core.indexes import default_indexes
     from xarray.core.variable import IndexVariable, Variable
     
    -__all__ = (
    -    "assert_allclose",
    -    "assert_chunks_equal",
    -    "assert_equal",
    -    "assert_identical",
    -)
    +__all__ = ("assert_allclose", "assert_chunks_equal", "assert_equal", "assert_identical")
     
     
     def _decode_string_data(data):
    diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py
    index 0502348160e..7a0dda216e2 100644
    --- a/xarray/tests/test_interp.py
    +++ b/xarray/tests/test_interp.py
    @@ -699,3 +699,21 @@ def test_3641():
         times = xr.cftime_range("0001", periods=3, freq="500Y")
         da = xr.DataArray(range(3), dims=["time"], coords=[times])
         da.interp(time=["0002-05-01"])
    +
    +
    +@requires_scipy
    +@pytest.mark.parametrize("method", ["nearest", "linear"])
    +def test_decompose(method):
    +    da = xr.DataArray(
    +        np.arange(6).reshape(3, 2),
    +        dims=["x", "y"],
    +        coords={"x": [0, 1, 2], "y": [-0.1, -0.3]},
    +    )
    +    x_new = xr.DataArray([0.5, 1.5, 2.5], dims=["x1"])
    +    y_new = xr.DataArray([-0.15, -0.25], dims=["y1"])
    +    x_broadcast, y_broadcast = xr.broadcast(x_new, y_new)
    +    assert x_broadcast.ndim == 2
    +
    +    actual = da.interp(x=x_new, y=y_new, method=method).drop(("x", "y"))
    +    expected = da.interp(x=x_broadcast, y=y_broadcast, method=method).drop(("x", "y"))
    +    assert_allclose(actual, expected)
    
    From 864877c313d026ea5664570741a328324064f77c Mon Sep 17 00:00:00 2001
    From: Andrew Williams <56925856+AndrewWilliams3142@users.noreply.github.com>
    Date: Tue, 26 May 2020 20:03:24 +0100
    Subject: [PATCH 036/342] Corrcov typo fix (#4096)
    
    * fixing typo in corr, cov docstrings
    
    * reverted accidental changes in dataset.chunk()
    
    * pep8
    ---
     xarray/core/computation.py | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 6ac4f74c3a6..5e172ea29ab 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1108,7 +1108,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
         Coordinates:
           * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    >>> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
         ...                  dims=("space", "time"),
         ...                  coords=[('space', ['IA', 'IL', 'IN']),
         ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    @@ -1177,7 +1177,7 @@ def corr(da_a, da_b, dim=None):
         Coordinates:
           * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    >>> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
         ...                  dims=("space", "time"),
         ...                  coords=[('space', ['IA', 'IL', 'IN']),
         ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    
    From e5cc19cd8f8a69e0743f230f5bf51b7778a0ff96 Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Tue, 26 May 2020 22:20:01 +0000
    Subject: [PATCH 037/342] Fix conversion of multiindexed pandas objects to
     sparse xarray objects (#4088)
    
    * Fix conversion of multiindexed pandas objects to sparse xarray objects
    
    * lint
    
    * fix whats-new
    
    * fix test
    
    * minor whats-new
    ---
     doc/whats-new.rst              |  2 ++
     xarray/core/dataset.py         |  2 +-
     xarray/tests/test_dataarray.py | 18 ++++++++++++++++++
     3 files changed, 21 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 59c7faa8973..a32e0393bcf 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -102,6 +102,8 @@ Bug fixes
     ~~~~~~~~~
     - Support dark mode in VS code (:issue:`4024`)
       By `Keisuke Fujii `_.
    +- Fix bug when converting multiindexed Pandas objects to sparse xarray objects. (:issue:`4019`)
    +  By `Deepak Cherian `_.
     - ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`)
       By `Huite Bootsma `_.
     - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`)
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 43f6ad9c90e..2d0044711fe 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -4537,7 +4537,7 @@ def _set_sparse_data_from_dataframe(
             idx = dataframe.index
             if isinstance(idx, pd.MultiIndex):
                 coords = np.stack([np.asarray(code) for code in idx.codes], axis=0)
    -            is_sorted = idx.is_lexsorted
    +            is_sorted = idx.is_lexsorted()
                 shape = tuple(lev.size for lev in idx.levels)
             else:
                 coords = np.arange(idx.size).reshape(1, -1)
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index a01234616a4..54a77261fb4 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -3532,6 +3532,24 @@ def test_from_series_sparse(self):
             actual_sparse.data = actual_sparse.data.todense()
             assert_identical(actual_sparse, actual_dense)
     
    +    @requires_sparse
    +    def test_from_multiindex_series_sparse(self):
    +        # regression test for GH4019
    +        import sparse
    +
    +        idx = pd.MultiIndex.from_product([np.arange(3), np.arange(5)], names=["a", "b"])
    +        series = pd.Series(np.random.RandomState(0).random(len(idx)), index=idx).sample(
    +            n=5, random_state=3
    +        )
    +
    +        dense = DataArray.from_series(series, sparse=False)
    +        expected_coords = sparse.COO.from_numpy(dense.data, np.nan).coords
    +
    +        actual_sparse = xr.DataArray.from_series(series, sparse=True)
    +        actual_coords = actual_sparse.data.coords
    +
    +        np.testing.assert_equal(actual_coords, expected_coords)
    +
         def test_to_and_from_empty_series(self):
             # GH697
             expected = pd.Series([], dtype=np.float64)
    
    From a2e9804d372a2e8a0d83904544b1d8421bfa2b44 Mon Sep 17 00:00:00 2001
    From: Akio Taniguchi 
    Date: Sat, 30 May 2020 01:50:58 +0900
    Subject: [PATCH 038/342] Add xarray-custom to related projects (#4109)
    
    ---
     doc/related-projects.rst | 1 +
     1 file changed, 1 insertion(+)
    
    diff --git a/doc/related-projects.rst b/doc/related-projects.rst
    index b02c4be7338..9891f1a6bc2 100644
    --- a/doc/related-projects.rst
    +++ b/doc/related-projects.rst
    @@ -62,6 +62,7 @@ Extend xarray capabilities
     - `eofs `_: EOF analysis in Python.
     - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input.
     - `nxarray `_: NeXus input/output capability for xarray.
    +- `xarray-custom `_: Data classes for custom xarray creation.
     - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations).
     - `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API.
     - `xrft `_: Fourier transforms for xarray data.
    
    From 73b013f22fe050ecd758b9c13fb5a06c4a8ba22e Mon Sep 17 00:00:00 2001
    From: Phillip Butcher 
    Date: Sat, 30 May 2020 06:15:55 -0700
    Subject: [PATCH 039/342] Assign default group name in groupby if name=None
     (#158) (#4098)
    
    * Assign default group name in groupby (#158)
    
    * When groupby receives a DataArray with name=None assign name='group'
    
    * Previously when name=None a ValueError: `group` must have a name was raised
    
    * Closes #158
    
    * Add test
    
    * Update whats-new.rst
    
    * black
    
    * Add assert statement to test group name was added to DataArray
    
    Co-authored-by: phillipbutcher 
    ---
     doc/whats-new.rst            |  2 ++
     xarray/core/groupby.py       |  2 +-
     xarray/tests/test_groupby.py | 12 ++++++++++++
     3 files changed, 15 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index a32e0393bcf..e06ed5be897 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -100,6 +100,8 @@ New Features
     
     Bug fixes
     ~~~~~~~~~
    +- If groupby receives a ``DataArray`` with name=None, assign a default name (:issue:`158`)
    +  By `Phil Butcher `_.
     - Support dark mode in VS code (:issue:`4024`)
       By `Keisuke Fujii `_.
     - Fix bug when converting multiindexed Pandas objects to sparse xarray objects. (:issue:`4019`)
    diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
    index 299cb8ec4fa..04c0fabae6a 100644
    --- a/xarray/core/groupby.py
    +++ b/xarray/core/groupby.py
    @@ -321,7 +321,7 @@ def __init__(
                     group = _DummyGroup(obj, group.name, group.coords)
     
             if getattr(group, "name", None) is None:
    -            raise ValueError("`group` must have a name")
    +            group.name = "group"
     
             group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj)
             (group_dim,) = group.dims
    diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
    index 866d5fb0899..aa54c8f36f1 100644
    --- a/xarray/tests/test_groupby.py
    +++ b/xarray/tests/test_groupby.py
    @@ -538,4 +538,16 @@ def test_groupby_bins_timeseries():
         assert_identical(actual, expected)
     
     
    +def test_groupby_none_group_name():
    +    # GH158
    +    # xarray should not fail if a DataArray's name attribute is None
    +
    +    data = np.arange(10) + 10
    +    da = xr.DataArray(data)  # da.name = None
    +    key = xr.DataArray(np.floor_divide(data, 2))
    +
    +    mean = da.groupby(key).mean()
    +    assert "group" in mean.dims
    +
    +
     # TODO: move other groupby tests from test_dataset and test_dataarray over here
    
    From fd9e620a84389170138cc014ee5a0213718beb78 Mon Sep 17 00:00:00 2001
    From: Pascal Bourgault 
    Date: Sat, 30 May 2020 14:08:26 -0400
    Subject: [PATCH 040/342] xr.infer_freq (#4033)
    
    * xr.infer_freq and related code
    
    * Formatting and comments
    
    * Rewrite _CFTimeFrequencyInferer independently of pandas
    
    * Syntax and add frequency.py file
    
    * Fix tests and month_deltas
    
    * Require cftime 1.1.0 for the test
    
    * Apply suggestions from code review
    
    Co-authored-by: Spencer Clark 
    
    * Changes following code review
    
    * Docs
    
    * Docs
    
    * Black
    
    * Fix tests for requiring cftime 1.1.0
    
    * Update whats-new
    
    * Apply suggestions from code review
    
    Co-authored-by: Spencer Clark 
    Co-authored-by: Mathias Hauser 
    
    * Add invalid input tests for better coverage
    
    * Fix link in whats-new.rst
    
    Co-authored-by: Spencer Clark 
    Co-authored-by: Mathias Hauser 
    ---
     doc/api.rst                      |   1 +
     doc/weather-climate.rst          |   9 +
     doc/whats-new.rst                |   3 +-
     xarray/__init__.py               |   2 +
     xarray/coding/cftimeindex.py     |   3 +-
     xarray/coding/frequencies.py     | 272 +++++++++++++++++++++++++++++++
     xarray/tests/test_cftimeindex.py |  70 ++++++++
     7 files changed, 358 insertions(+), 2 deletions(-)
     create mode 100644 xarray/coding/frequencies.py
    
    diff --git a/doc/api.rst b/doc/api.rst
    index c9f24e8c3f1..3f25ac1a070 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -26,6 +26,7 @@ Top-level functions
        combine_nested
        where
        set_options
    +   infer_freq
        full_like
        zeros_like
        ones_like
    diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst
    index 1eb63d24630..f03dfd14c73 100644
    --- a/doc/weather-climate.rst
    +++ b/doc/weather-climate.rst
    @@ -74,6 +74,15 @@ instance, we can create the same dates and DataArray we created above using:
         dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap")
         da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo")
     
    +Mirroring pandas' method with the same name, :py:meth:`~xarray.infer_freq` allows one to
    +infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D
    +:py:class:`~xarray.DataArray` containing cftime objects. It also works transparently with
    +``np.datetime64[ns]`` and ``np.timedelta64[ns]`` data.
    +
    +.. ipython:: python
    +
    +    xr.infer_freq(dates)
    +
     With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from
     the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the
     :py:meth:`~xarray.DataArray.dt` accessor for a :py:class:`~xarray.DataArray`
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e06ed5be897..e8e30917cff 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -43,7 +43,8 @@ Enhancements
     
     New Features
     ~~~~~~~~~~~~
    -
    +- Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`).
    +  By `Pascal Bourgault `_.
     - ``chunks='auto'`` is now supported in the ``chunks`` argument of
       :py:meth:`Dataset.chunk`. (:issue:`4055`)
       By `Andrew Williams `_ 
    diff --git a/xarray/__init__.py b/xarray/__init__.py
    index e8274d13ffe..cb4824d188d 100644
    --- a/xarray/__init__.py
    +++ b/xarray/__init__.py
    @@ -13,6 +13,7 @@
     from .backends.zarr import open_zarr
     from .coding.cftime_offsets import cftime_range
     from .coding.cftimeindex import CFTimeIndex
    +from .coding.frequencies import infer_freq
     from .conventions import SerializationWarning, decode_cf
     from .core.alignment import align, broadcast
     from .core.combine import auto_combine, combine_by_coords, combine_nested
    @@ -57,6 +58,7 @@
         "cov",
         "corr",
         "full_like",
    +    "infer_freq",
         "load_dataarray",
         "load_dataset",
         "map_blocks",
    diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
    index 6fc28d213dd..2a7eaa99edb 100644
    --- a/xarray/coding/cftimeindex.py
    +++ b/xarray/coding/cftimeindex.py
    @@ -578,7 +578,8 @@ def asi8(self):
                 [
                     _total_microseconds(exact_cftime_datetime_difference(epoch, date))
                     for date in self.values
    -            ]
    +            ],
    +            dtype=np.int64,
             )
     
         def _round_via_method(self, freq, method):
    diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py
    new file mode 100644
    index 00000000000..86f84ba5fbd
    --- /dev/null
    +++ b/xarray/coding/frequencies.py
    @@ -0,0 +1,272 @@
    +"""FrequencyInferer analog for cftime.datetime objects"""
    +# The infer_freq method and the _CFTimeFrequencyInferer
    +# subclass defined here were copied and adapted for
    +# use with cftime.datetime objects based on the source code in
    +# pandas.tseries.Frequencies._FrequencyInferer
    +
    +# For reference, here is a copy of the pandas copyright notice:
    +
    +# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
    +# All rights reserved.
    +
    +# Copyright (c) 2008-2011 AQR Capital Management, LLC
    +# All rights reserved.
    +
    +# Redistribution and use in source and binary forms, with or without
    +# modification, are permitted provided that the following conditions are
    +# met:
    +
    +#     * Redistributions of source code must retain the above copyright
    +#        notice, this list of conditions and the following disclaimer.
    +
    +#     * Redistributions in binary form must reproduce the above
    +#        copyright notice, this list of conditions and the following
    +#        disclaimer in the documentation and/or other materials provided
    +#        with the distribution.
    +
    +#     * Neither the name of the copyright holder nor the names of any
    +#        contributors may be used to endorse or promote products derived
    +#        from this software without specific prior written permission.
    +
    +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
    +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    +
    +import numpy as np
    +import pandas as pd
    +
    +from ..core.common import _contains_datetime_like_objects
    +from .cftime_offsets import _MONTH_ABBREVIATIONS
    +from .cftimeindex import CFTimeIndex
    +
    +_ONE_MICRO = 1
    +_ONE_MILLI = _ONE_MICRO * 1000
    +_ONE_SECOND = _ONE_MILLI * 1000
    +_ONE_MINUTE = 60 * _ONE_SECOND
    +_ONE_HOUR = 60 * _ONE_MINUTE
    +_ONE_DAY = 24 * _ONE_HOUR
    +
    +
    +def infer_freq(index):
    +    """
    +    Infer the most likely frequency given the input index.
    +
    +    Parameters
    +    ----------
    +    index : CFTimeIndex, DataArray, pd.DatetimeIndex, pd.TimedeltaIndex, pd.Series
    +      If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`.
    +      If passed a Series or a DataArray will use the values of the series (NOT THE INDEX).
    +
    +    Returns
    +    -------
    +    str or None
    +        None if no discernible frequency.
    +
    +    Raises
    +    ------
    +    TypeError
    +        If the index is not datetime-like.
    +    ValueError
    +        If there are fewer than three values or the index is not 1D.
    +    """
    +    from xarray.core.dataarray import DataArray
    +
    +    if isinstance(index, (DataArray, pd.Series)):
    +        if index.ndim != 1:
    +            raise ValueError("'index' must be 1D")
    +        elif not _contains_datetime_like_objects(DataArray(index)):
    +            raise ValueError("'index' must contain datetime-like objects")
    +        dtype = np.asarray(index).dtype
    +        if dtype == "datetime64[ns]":
    +            index = pd.DatetimeIndex(index.values)
    +        elif dtype == "timedelta64[ns]":
    +            index = pd.TimedeltaIndex(index.values)
    +        else:
    +            index = CFTimeIndex(index.values)
    +
    +    if isinstance(index, CFTimeIndex):
    +        inferer = _CFTimeFrequencyInferer(index)
    +        return inferer.get_freq()
    +
    +    return pd.infer_freq(index)
    +
    +
    +class _CFTimeFrequencyInferer:  # (pd.tseries.frequencies._FrequencyInferer):
    +    def __init__(self, index):
    +        self.index = index
    +        self.values = index.asi8
    +
    +        if len(index) < 3:
    +            raise ValueError("Need at least 3 dates to infer frequency")
    +
    +        self.is_monotonic = (
    +            self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing
    +        )
    +
    +        self._deltas = None
    +        self._year_deltas = None
    +        self._month_deltas = None
    +
    +    def get_freq(self):
    +        """Find the appropriate frequency string to describe the inferred frequency of self.index
    +
    +        Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes.
    +
    +        Returns
    +        -------
    +        str or None
    +        """
    +        if not self.is_monotonic or not self.index.is_unique:
    +            return None
    +
    +        delta = self.deltas[0]  # Smallest delta
    +        if _is_multiple(delta, _ONE_DAY):
    +            return self._infer_daily_rule()
    +        # There is no possible intraday frequency with a non-unique delta
    +        # Different from pandas: we don't need to manage DST and business offsets in cftime
    +        elif not len(self.deltas) == 1:
    +            return None
    +
    +        if _is_multiple(delta, _ONE_HOUR):
    +            return _maybe_add_count("H", delta / _ONE_HOUR)
    +        elif _is_multiple(delta, _ONE_MINUTE):
    +            return _maybe_add_count("T", delta / _ONE_MINUTE)
    +        elif _is_multiple(delta, _ONE_SECOND):
    +            return _maybe_add_count("S", delta / _ONE_SECOND)
    +        elif _is_multiple(delta, _ONE_MILLI):
    +            return _maybe_add_count("L", delta / _ONE_MILLI)
    +        else:
    +            return _maybe_add_count("U", delta / _ONE_MICRO)
    +
    +    def _infer_daily_rule(self):
    +        annual_rule = self._get_annual_rule()
    +        if annual_rule:
    +            nyears = self.year_deltas[0]
    +            month = _MONTH_ABBREVIATIONS[self.index[0].month]
    +            alias = f"{annual_rule}-{month}"
    +            return _maybe_add_count(alias, nyears)
    +
    +        quartely_rule = self._get_quartely_rule()
    +        if quartely_rule:
    +            nquarters = self.month_deltas[0] / 3
    +            mod_dict = {0: 12, 2: 11, 1: 10}
    +            month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]]
    +            alias = f"{quartely_rule}-{month}"
    +            return _maybe_add_count(alias, nquarters)
    +
    +        monthly_rule = self._get_monthly_rule()
    +        if monthly_rule:
    +            return _maybe_add_count(monthly_rule, self.month_deltas[0])
    +
    +        if len(self.deltas) == 1:
    +            # Daily as there is no "Weekly" offsets with CFTime
    +            days = self.deltas[0] / _ONE_DAY
    +            return _maybe_add_count("D", days)
    +
    +        # CFTime has no business freq and no "week of month" (WOM)
    +        return None
    +
    +    def _get_annual_rule(self):
    +        if len(self.year_deltas) > 1:
    +            return None
    +
    +        if len(np.unique(self.index.month)) > 1:
    +            return None
    +
    +        return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index))
    +
    +    def _get_quartely_rule(self):
    +        if len(self.month_deltas) > 1:
    +            return None
    +
    +        if not self.month_deltas[0] % 3 == 0:
    +            return None
    +
    +        return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index))
    +
    +    def _get_monthly_rule(self):
    +        if len(self.month_deltas) > 1:
    +            return None
    +
    +        return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index))
    +
    +    @property
    +    def deltas(self):
    +        """Sorted unique timedeltas as microseconds."""
    +        if self._deltas is None:
    +            self._deltas = _unique_deltas(self.values)
    +        return self._deltas
    +
    +    @property
    +    def year_deltas(self):
    +        """Sorted unique year deltas."""
    +        if self._year_deltas is None:
    +            self._year_deltas = _unique_deltas(self.index.year)
    +        return self._year_deltas
    +
    +    @property
    +    def month_deltas(self):
    +        """Sorted unique month deltas."""
    +        if self._month_deltas is None:
    +            self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month)
    +        return self._month_deltas
    +
    +
    +def _unique_deltas(arr):
    +    """Sorted unique deltas of numpy array"""
    +    return np.sort(np.unique(np.diff(arr)))
    +
    +
    +def _is_multiple(us, mult: int):
    +    """Whether us is a multiple of mult"""
    +    return us % mult == 0
    +
    +
    +def _maybe_add_count(base: str, count: float):
    +    """If count is greater than 1, add it to the base offset string"""
    +    if count != 1:
    +        assert count == int(count)
    +        count = int(count)
    +        return f"{count}{base}"
    +    else:
    +        return base
    +
    +
    +def month_anchor_check(dates):
    +    """Return the monthly offset string.
    +
    +    Return "cs" if all dates are the first days of the month,
    +    "ce" if all dates are the last day of the month,
    +    None otherwise.
    +
    +    Replicated pandas._libs.tslibs.resolution.month_position_check
    +    but without business offset handling.
    +    """
    +    calendar_end = True
    +    calendar_start = True
    +
    +    for date in dates:
    +        if calendar_start:
    +            calendar_start &= date.day == 1
    +
    +        if calendar_end:
    +            cal = date.day == date.daysinmonth
    +            if calendar_end:
    +                calendar_end &= cal
    +        elif not calendar_start:
    +            break
    +
    +    if calendar_end:
    +        return "ce"
    +    elif calendar_start:
    +        return "cs"
    +    else:
    +        return None
    diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
    index b30e32c92ad..745ae341370 100644
    --- a/xarray/tests/test_cftimeindex.py
    +++ b/xarray/tests/test_cftimeindex.py
    @@ -1046,3 +1046,73 @@ def test_asi8_distant_date():
         result = index.asi8
         expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456])
         np.testing.assert_array_equal(result, expected)
    +
    +
    +@requires_cftime_1_1_0
    +def test_infer_freq_valid_types():
    +    cf_indx = xr.cftime_range("2000-01-01", periods=3, freq="D")
    +    assert xr.infer_freq(cf_indx) == "D"
    +    assert xr.infer_freq(xr.DataArray(cf_indx)) == "D"
    +
    +    pd_indx = pd.date_range("2000-01-01", periods=3, freq="D")
    +    assert xr.infer_freq(pd_indx) == "D"
    +    assert xr.infer_freq(xr.DataArray(pd_indx)) == "D"
    +
    +    pd_td_indx = pd.timedelta_range(start="1D", periods=3, freq="D")
    +    assert xr.infer_freq(pd_td_indx) == "D"
    +    assert xr.infer_freq(xr.DataArray(pd_td_indx)) == "D"
    +
    +
    +@requires_cftime_1_1_0
    +def test_infer_freq_invalid_inputs():
    +    # Non-datetime DataArray
    +    with pytest.raises(ValueError, match="must contain datetime-like objects"):
    +        xr.infer_freq(xr.DataArray([0, 1, 2]))
    +
    +    indx = xr.cftime_range("1990-02-03", periods=4, freq="MS")
    +    # 2D DataArray
    +    with pytest.raises(ValueError, match="must be 1D"):
    +        xr.infer_freq(xr.DataArray([indx, indx]))
    +
    +    # CFTimeIndex too short
    +    with pytest.raises(ValueError, match="Need at least 3 dates to infer frequency"):
    +        xr.infer_freq(indx[:2])
    +
    +    # Non-monotonic input
    +    assert xr.infer_freq(indx[np.array([0, 2, 1, 3])]) is None
    +
    +    # Non-unique input
    +    assert xr.infer_freq(indx[np.array([0, 1, 1, 2])]) is None
    +
    +    # No unique frequency (here 1st step is MS, second is 2MS)
    +    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None
    +
    +    # Same, but for QS
    +    indx = xr.cftime_range("1990-02-03", periods=4, freq="QS")
    +    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None
    +
    +
    +@requires_cftime_1_1_0
    +@pytest.mark.parametrize(
    +    "freq",
    +    [
    +        "300AS-JAN",
    +        "A-DEC",
    +        "AS-JUL",
    +        "2AS-FEB",
    +        "Q-NOV",
    +        "3QS-DEC",
    +        "MS",
    +        "4M",
    +        "7D",
    +        "D",
    +        "30H",
    +        "5T",
    +        "40S",
    +    ],
    +)
    +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
    +def test_infer_freq(freq, calendar):
    +    indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar)
    +    out = xr.infer_freq(indx)
    +    assert out == freq
    
    From 93b2d040ff17baffd1db976acf4e6cd0c8291045 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Tue, 2 Jun 2020 01:11:08 +0200
    Subject: [PATCH 041/342] update numpy's intersphinx url (#4117)
    
    ---
     doc/conf.py | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/doc/conf.py b/doc/conf.py
    index 5d304dab362..6b16468d29e 100644
    --- a/doc/conf.py
    +++ b/doc/conf.py
    @@ -351,7 +351,7 @@
         "python": ("https://docs.python.org/3/", None),
         "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
         "iris": ("https://scitools.org.uk/iris/docs/latest", None),
    -    "numpy": ("https://docs.scipy.org/doc/numpy", None),
    +    "numpy": ("https://numpy.org/doc/stable", None),
         "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
         "numba": ("https://numba.pydata.org/numba-doc/latest", None),
         "matplotlib": ("https://matplotlib.org", None),
    
    From 09df5ca4036d84620373fa4bccd11d1f1d4bec28 Mon Sep 17 00:00:00 2001
    From: Pascal Bourgault 
    Date: Fri, 5 Jun 2020 11:45:59 -0400
    Subject: [PATCH 042/342] Allow non-unique and non-monotonic coordinates in
     get_clean_interp_index and polyfit (#4099)
    
    * Allow non-unique and non-monotonic in get_clean_interp_index and polyfit
    
    * black on missing.py
    
    * Apply change to polyval, add pr to whats new
    
    * Add tests for get_clean_interp_index return values
    ---
     doc/whats-new.rst            |  2 +-
     xarray/core/computation.py   |  2 +-
     xarray/core/dataset.py       |  2 +-
     xarray/core/missing.py       | 17 +++++++++++------
     xarray/tests/test_missing.py | 12 ++++++++++++
     5 files changed, 26 insertions(+), 9 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e8e30917cff..bf9347d46a2 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -50,7 +50,7 @@ New Features
       By `Andrew Williams `_ 
     - Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
       By `Andrew Williams `_ and `Robin Beer `_.
    -- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
    +- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`)
       By `Pascal Bourgault `_.
     - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
       :py:func:`combine_by_coords` and :py:func:`combine_nested` using
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 5e172ea29ab..cecd4fd8e70 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1506,7 +1506,7 @@ def polyval(coord, coeffs, degree_dim="degree"):
         from .dataarray import DataArray
         from .missing import get_clean_interp_index
     
    -    x = get_clean_interp_index(coord, coord.name)
    +    x = get_clean_interp_index(coord, coord.name, strict=False)
     
         deg_coord = coeffs[degree_dim]
     
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 2d0044711fe..d50c6e1951e 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -5839,7 +5839,7 @@ def polyfit(
             variables = {}
             skipna_da = skipna
     
    -        x = get_clean_interp_index(self, dim)
    +        x = get_clean_interp_index(self, dim, strict=False)
             xname = "{}_".format(self[dim].name)
             order = int(deg) + 1
             lhs = np.vander(x, order)
    diff --git a/xarray/core/missing.py b/xarray/core/missing.py
    index 374eaec1fa7..59d4f777c73 100644
    --- a/xarray/core/missing.py
    +++ b/xarray/core/missing.py
    @@ -208,7 +208,9 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):
         return ds
     
     
    -def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True):
    +def get_clean_interp_index(
    +    arr, dim: Hashable, use_coordinate: Union[str, bool] = True, strict: bool = True
    +):
         """Return index to use for x values in interpolation or curve fitting.
     
         Parameters
    @@ -221,6 +223,8 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
           If use_coordinate is True, the coordinate that shares the name of the
           dimension along which interpolation is being performed will be used as the
           x values. If False, the x values are set as an equally spaced sequence.
    +    strict : bool
    +      Whether to raise errors if the index is either non-unique or non-monotonic (default).
     
         Returns
         -------
    @@ -257,11 +261,12 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
         if isinstance(index, pd.MultiIndex):
             index.name = dim
     
    -    if not index.is_monotonic:
    -        raise ValueError(f"Index {index.name!r} must be monotonically increasing")
    +    if strict:
    +        if not index.is_monotonic:
    +            raise ValueError(f"Index {index.name!r} must be monotonically increasing")
     
    -    if not index.is_unique:
    -        raise ValueError(f"Index {index.name!r} has duplicate values")
    +        if not index.is_unique:
    +            raise ValueError(f"Index {index.name!r} has duplicate values")
     
         # Special case for non-standard calendar indexes
         # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds
    @@ -282,7 +287,7 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
             # xarray/numpy raise a ValueError
             raise TypeError(
                 f"Index {index.name!r} must be castable to float64 to support "
    -            f"interpolation, got {type(index).__name__}."
    +            f"interpolation or curve fitting, got {type(index).__name__}."
             )
     
         return index
    diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
    index 731cd165244..bc186c8bd15 100644
    --- a/xarray/tests/test_missing.py
    +++ b/xarray/tests/test_missing.py
    @@ -534,6 +534,18 @@ def test_get_clean_interp_index_potential_overflow():
         get_clean_interp_index(da, "time")
     
     
    +@pytest.mark.parametrize("index", ([0, 2, 1], [0, 1, 1]))
    +def test_get_clean_interp_index_strict(index):
    +    da = xr.DataArray([0, 1, 2], dims=("x",), coords={"x": index})
    +
    +    with pytest.raises(ValueError):
    +        get_clean_interp_index(da, "x")
    +
    +    clean = get_clean_interp_index(da, "x", strict=False)
    +    np.testing.assert_array_equal(index, clean)
    +    assert clean.dtype == np.float64
    +
    +
     @pytest.fixture
     def da_time():
         return xr.DataArray(
    
    From 274bd4b98235557f643eb110e77ee09c3c8689bc Mon Sep 17 00:00:00 2001
    From: Dave Cole 
    Date: Sat, 6 Jun 2020 05:32:49 +1000
    Subject: [PATCH 043/342] Fix open_rasterio() for WarpedVRT with specified
     src_crs (#4104)
    
    * Test open_rasterio() support of WarpedVRT with specified src_crs
    
    * Pass additional WarpedVRT params when recreating in open_rasterio()
    
    * Add description to `whats-new.rst`
    
    * Update doc/whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst             |  2 ++
     xarray/backends/rasterio_.py  |  7 +++++--
     xarray/tests/test_backends.py | 13 +++++++++++++
     3 files changed, 20 insertions(+), 2 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index bf9347d46a2..0c5e61addf5 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -143,6 +143,8 @@ Bug fixes
       By `Mathias Hauser `_.
     - Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`)
       By `Benoit Bovy `_.
    +- Fix :py:func:`open_rasterio` for ``WarpedVRT`` with specified ``src_crs``. (:pull:`4104`)
    +  By `Dave Cole `_.
     
     Documentation
     ~~~~~~~~~~~~~
    diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
    index 77beffd09b1..661d5b5c6fc 100644
    --- a/xarray/backends/rasterio_.py
    +++ b/xarray/backends/rasterio_.py
    @@ -221,14 +221,17 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
             vrt = filename
             filename = vrt.src_dataset.name
             vrt_params = dict(
    +            src_crs=vrt.src_crs.to_string(),
                 crs=vrt.crs.to_string(),
                 resampling=vrt.resampling,
    +            tolerance=vrt.tolerance,
                 src_nodata=vrt.src_nodata,
                 nodata=vrt.nodata,
    -            tolerance=vrt.tolerance,
    -            transform=vrt.transform,
                 width=vrt.width,
                 height=vrt.height,
    +            src_transform=vrt.src_transform,
    +            transform=vrt.transform,
    +            dtype=vrt.working_dtype,
                 warp_extras=vrt.warp_extras,
             )
     
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 49a39474b54..3642c1eb9b7 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -4160,6 +4160,19 @@ def test_rasterio_vrt_with_transform_and_size(self):
                             assert actual_shape == expected_shape
                             assert actual_transform == expected_transform
     
    +    def test_rasterio_vrt_with_src_crs(self):
    +        # Test open_rasterio() support of WarpedVRT with specified src_crs
    +        import rasterio
    +
    +        # create geotiff with no CRS and specify it manually
    +        with create_tmp_geotiff(crs=None) as (tmp_file, expected):
    +            src_crs = rasterio.crs.CRS({"init": "epsg:32618"})
    +            with rasterio.open(tmp_file) as src:
    +                assert src.crs is None
    +                with rasterio.vrt.WarpedVRT(src, src_crs=src_crs) as vrt:
    +                    with xr.open_rasterio(vrt) as da:
    +                        assert da.crs == src_crs
    +
         @network
         def test_rasterio_vrt_network(self):
             # Make sure loading w/ rasterio give same results as xarray
    
    From c07160dd2d627a021e58515cbd7753c11fb56d94 Mon Sep 17 00:00:00 2001
    From: Oriol Abril 
    Date: Fri, 5 Jun 2020 21:39:09 +0200
    Subject: [PATCH 044/342] keep attrs in reset_index (#4103)
    
    * keep attrs when resetting single index
    
    * add dataarray test
    
    * modify tests
    
    * remove rename
    
    * update what's new
    ---
     doc/whats-new.rst              | 10 ++++++----
     xarray/core/dataset.py         |  4 ++--
     xarray/tests/test_dataarray.py |  7 +++++++
     xarray/tests/test_dataset.py   |  7 +++++++
     4 files changed, 22 insertions(+), 6 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 0c5e61addf5..21eb28130c2 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -36,10 +36,12 @@ Breaking changes
     
     Enhancements
     ~~~~~~~~~~~~
    -- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` 
    -  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially 
    +- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp`
    +  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially
       rather than interpolating in multidimensional space. (:issue:`2223`)
       By `Keisuke Fujii `_.
    +- :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
    +  coordinate attributes (:pull:`4103`). By `Oriol Abril `_.
     
     New Features
     ~~~~~~~~~~~~
    @@ -47,7 +49,7 @@ New Features
       By `Pascal Bourgault `_.
     - ``chunks='auto'`` is now supported in the ``chunks`` argument of
       :py:meth:`Dataset.chunk`. (:issue:`4055`)
    -  By `Andrew Williams `_ 
    +  By `Andrew Williams `_
     - Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
       By `Andrew Williams `_ and `Robin Beer `_.
     - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`)
    @@ -77,7 +79,7 @@ New Features
       By `Stephan Hoyer `_.
     - Allow plotting of boolean arrays. (:pull:`3766`)
       By `Marek Jacob `_
    -- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`). 
    +- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`).
       By `Mathias Hauser `_.
     - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to
       the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index d50c6e1951e..191b57a667a 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -329,7 +329,7 @@ def split_indexes(
             else:
                 vars_to_remove.append(d)
                 if not drop:
    -                vars_to_create[str(d) + "_"] = Variable(d, index)
    +                vars_to_create[str(d) + "_"] = Variable(d, index, variables[d].attrs)
     
         for d, levs in dim_levels.items():
             index = variables[d].to_index()
    @@ -341,7 +341,7 @@ def split_indexes(
             if not drop:
                 for lev in levs:
                     idx = index.get_level_values(lev)
    -                vars_to_create[idx.name] = Variable(d, idx)
    +                vars_to_create[idx.name] = Variable(d, idx, variables[d].attrs)
     
         new_variables = dict(variables)
         for v in set(vars_to_remove):
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index 54a77261fb4..95f0ad9f612 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -1830,6 +1830,13 @@ def test_reset_index(self):
             expected = DataArray([1, 2], coords={"x_": ("x", ["a", "b"])}, dims="x")
             assert_identical(array.reset_index("x"), expected)
     
    +    def test_reset_index_keep_attrs(self):
    +        coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
    +        da = DataArray([1, 0], [coord_1])
    +        expected = DataArray([1, 0], {"coord_1_": coord_1}, dims=["coord_1"])
    +        obj = da.reset_index("coord_1")
    +        assert_identical(expected, obj)
    +
         def test_reorder_levels(self):
             midx = self.mindex.reorder_levels(["level_2", "level_1"])
             expected = DataArray(self.mda.values, coords={"x": midx}, dims="x")
    diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
    index 2a89920766c..fd04c8a7f64 100644
    --- a/xarray/tests/test_dataset.py
    +++ b/xarray/tests/test_dataset.py
    @@ -2864,6 +2864,13 @@ def test_reset_index(self):
             with pytest.raises(TypeError):
                 ds.reset_index("x", inplace=True)
     
    +    def test_reset_index_keep_attrs(self):
    +        coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
    +        ds = Dataset({}, {"coord_1": coord_1})
    +        expected = Dataset({}, {"coord_1_": coord_1})
    +        obj = ds.reset_index("coord_1")
    +        assert_identical(expected, obj)
    +
         def test_reorder_levels(self):
             ds = create_test_multiindex()
             mindex = ds["x"].to_index()
    
    From 2a288f6ed4286910fcf3ab9895e1e9cbd44d30b4 Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Sun, 7 Jun 2020 16:13:34 +0000
    Subject: [PATCH 045/342] map_blocks: Allow passing dask-backed objects in args
     (#3818)
    
    * MVP for dask collections in args
    
    * Add tests.
    
    * Use list comprehension
    
    * map_blocks: preserve attrs of dimension coordinates in input
    
    Switch to use IndexVariables instead of Indexes so that attrs are preserved.
    
    * Check that chunk sizes are compatible.
    
    * Align all xarray objects
    
    * Add some type hints.
    
    * fix rebase
    
    * move _wrapper out
    
    * Fixes
    
    * avoid index dataarrays for simplicity.
    
    need a solution to preserve index attrs
    
    * Propagate attributes for index variables.
    
    * Propagate encoding for index variables.
    
    * Fix bug with reductions when template is provided.
    
    indexes should just have indexes for output variable. When template was
    provided, I was initializing to indexes to contain all input indexes.
    It should just have the indexes from template. Otherwise indexes for
    any indexed dimensions removed by func will still be propagated.
    
    * more minimal fix.
    
    * minimize diff
    
    * Update docs.
    
    * Address joe comments.
    
    * docstring updates.
    
    * minor docstring change
    
    * minor.
    
    * remove useless check_shapes variable.
    
    * fix docstring
    ---
     doc/whats-new.rst         |   2 +
     xarray/core/dataarray.py  |  84 +++++++++++----
     xarray/core/dataset.py    |  83 +++++++++++----
     xarray/core/parallel.py   | 210 ++++++++++++++++++++++++--------------
     xarray/tests/test_dask.py |  59 ++++++++++-
     5 files changed, 324 insertions(+), 114 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 21eb28130c2..85e73e1b7e8 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -94,6 +94,8 @@ New Features
     - :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases
       where the result of a computation could not be inferred automatically.
       By `Deepak Cherian `_
    +- :py:meth:`map_blocks` can now handle dask-backed xarray objects in ``args``. (:pull:`3818`)
    +  By `Deepak Cherian `_
     
     - Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`,
       (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index 236938bac74..3451ff14c8f 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -3262,45 +3262,91 @@ def map_blocks(
             ----------
             func: callable
                 User-provided function that accepts a DataArray as its first
    -            parameter. The function will receive a subset, i.e. one block, of this DataArray
    -            (see below), corresponding to one chunk along each chunked dimension. ``func`` will be
    -            executed as ``func(block_subset, *args, **kwargs)``.
    +            parameter. The function will receive a subset or 'block' of this DataArray (see below),
    +            corresponding to one chunk along each chunked dimension. ``func`` will be
    +            executed as ``func(subset_dataarray, *subset_args, **kwargs)``.
     
                 This function must return either a single DataArray or a single Dataset.
     
                 This function cannot add a new chunked dimension.
    +
    +        obj: DataArray, Dataset
    +            Passed to the function as its first argument, one block at a time.
             args: Sequence
    -            Passed verbatim to func after unpacking, after the sliced DataArray. xarray
    -            objects, if any, will not be split by chunks. Passing dask collections is
    -            not allowed.
    +            Passed to func after unpacking and subsetting any xarray objects by blocks.
    +            xarray objects in args must be aligned with obj, otherwise an error is raised.
             kwargs: Mapping
                 Passed verbatim to func after unpacking. xarray objects, if any, will not be
    -            split by chunks. Passing dask collections is not allowed.
    +            subset to blocks. Passing dask collections in kwargs is not allowed.
             template: (optional) DataArray, Dataset
                 xarray object representing the final result after compute is called. If not provided,
    -            the function will be first run on mocked-up data, that looks like 'obj' but
    +            the function will be first run on mocked-up data, that looks like ``obj`` but
                 has sizes 0, to determine properties of the returned object such as dtype,
    -            variable names, new dimensions and new indexes (if any).
    -            'template' must be provided if the function changes the size of existing dimensions.
    +            variable names, attributes, new dimensions and new indexes (if any).
    +            ``template`` must be provided if the function changes the size of existing dimensions.
    +            When provided, ``attrs`` on variables in `template` are copied over to the result. Any
    +            ``attrs`` set by ``func`` will be ignored.
    +
     
             Returns
             -------
    -        A single DataArray or Dataset with dask backend, reassembled from the outputs of
    -        the function.
    +        A single DataArray or Dataset with dask backend, reassembled from the outputs of the
    +        function.
     
             Notes
             -----
    -        This method is designed for when one needs to manipulate a whole xarray object
    -        within each chunk. In the more common case where one can work on numpy arrays,
    -        it is recommended to use apply_ufunc.
    +        This function is designed for when ``func`` needs to manipulate a whole xarray object
    +        subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
    +        recommended to use ``apply_ufunc``.
     
    -        If none of the variables in this DataArray is backed by dask, calling this
    -        method is equivalent to calling ``func(self, *args, **kwargs)``.
    +        If none of the variables in ``obj`` is backed by dask arrays, calling this function is
    +        equivalent to calling ``func(obj, *args, **kwargs)``.
     
             See Also
             --------
    -        dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks,
    -        xarray.Dataset.map_blocks
    +        dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks,
    +        xarray.DataArray.map_blocks
    +
    +        Examples
    +        --------
    +
    +        Calculate an anomaly from climatology using ``.groupby()``. Using
    +        ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``,
    +        its indices, and its methods like ``.groupby()``.
    +
    +        >>> def calculate_anomaly(da, groupby_type="time.month"):
    +        ...     gb = da.groupby(groupby_type)
    +        ...     clim = gb.mean(dim="time")
    +        ...     return gb - clim
    +        >>> time = xr.cftime_range("1990-01", "1992-01", freq="M")
    +        >>> np.random.seed(123)
    +        >>> array = xr.DataArray(
    +        ...     np.random.rand(len(time)), dims="time", coords=[time]
    +        ... ).chunk()
    +        >>> array.map_blocks(calculate_anomaly, template=array).compute()
    +        
    +        array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
    +                0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
    +               -0.19063865,  0.0590131 , -0.12894847, -0.11323072,  0.0855964 ,
    +                0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108,
    +                0.07673453,  0.22865714,  0.19063865, -0.0590131 ])
    +        Coordinates:
    +          * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
    +
    +        Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments
    +        to the function being applied in ``xr.map_blocks()``:
    +
    +        >>> array.map_blocks(
    +        ...     calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array,
    +        ... )
    +        
    +        array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    +               -0.11974531,  0.43791243,  0.14197797, -0.06191987, -0.15073425,
    +               -0.19967375,  0.18619794, -0.05100474, -0.42989909, -0.09153273,
    +                0.24841842, -0.30708526, -0.31412523,  0.04197439,  0.0422506 ,
    +                0.14482397,  0.35985481,  0.23487834,  0.12144652])
    +        Coordinates:
    +            * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
             """
             from .parallel import map_blocks
     
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 191b57a667a..a8011afd3e3 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -5733,45 +5733,92 @@ def map_blocks(
             ----------
             func: callable
                 User-provided function that accepts a Dataset as its first
    -            parameter. The function will receive a subset, i.e. one block, of this Dataset
    -            (see below), corresponding to one chunk along each chunked dimension. ``func`` will be
    -            executed as ``func(block_subset, *args, **kwargs)``.
    +            parameter. The function will receive a subset or 'block' of this Dataset (see below),
    +            corresponding to one chunk along each chunked dimension. ``func`` will be
    +            executed as ``func(subset_dataset, *subset_args, **kwargs)``.
     
                 This function must return either a single DataArray or a single Dataset.
     
                 This function cannot add a new chunked dimension.
    +
    +        obj: DataArray, Dataset
    +            Passed to the function as its first argument, one block at a time.
             args: Sequence
    -            Passed verbatim to func after unpacking, after the sliced DataArray. xarray
    -            objects, if any, will not be split by chunks. Passing dask collections is
    -            not allowed.
    +            Passed to func after unpacking and subsetting any xarray objects by blocks.
    +            xarray objects in args must be aligned with obj, otherwise an error is raised.
             kwargs: Mapping
                 Passed verbatim to func after unpacking. xarray objects, if any, will not be
    -            split by chunks. Passing dask collections is not allowed.
    +            subset to blocks. Passing dask collections in kwargs is not allowed.
             template: (optional) DataArray, Dataset
                 xarray object representing the final result after compute is called. If not provided,
    -            the function will be first run on mocked-up data, that looks like 'obj' but
    +            the function will be first run on mocked-up data, that looks like ``obj`` but
                 has sizes 0, to determine properties of the returned object such as dtype,
    -            variable names, new dimensions and new indexes (if any).
    -            'template' must be provided if the function changes the size of existing dimensions.
    +            variable names, attributes, new dimensions and new indexes (if any).
    +            ``template`` must be provided if the function changes the size of existing dimensions.
    +            When provided, ``attrs`` on variables in `template` are copied over to the result. Any
    +            ``attrs`` set by ``func`` will be ignored.
    +
     
             Returns
             -------
    -        A single DataArray or Dataset with dask backend, reassembled from the outputs of
    -        the function.
    +        A single DataArray or Dataset with dask backend, reassembled from the outputs of the
    +        function.
     
             Notes
             -----
    -        This method is designed for when one needs to manipulate a whole xarray object
    -        within each chunk. In the more common case where one can work on numpy arrays,
    -        it is recommended to use apply_ufunc.
    +        This function is designed for when ``func`` needs to manipulate a whole xarray object
    +        subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
    +        recommended to use ``apply_ufunc``.
     
    -        If none of the variables in this Dataset is backed by dask, calling this method
    -        is equivalent to calling ``func(self, *args, **kwargs)``.
    +        If none of the variables in ``obj`` is backed by dask arrays, calling this function is
    +        equivalent to calling ``func(obj, *args, **kwargs)``.
     
             See Also
             --------
    -        dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks,
    +        dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks,
             xarray.DataArray.map_blocks
    +
    +        Examples
    +        --------
    +
    +        Calculate an anomaly from climatology using ``.groupby()``. Using
    +        ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``,
    +        its indices, and its methods like ``.groupby()``.
    +
    +        >>> def calculate_anomaly(da, groupby_type="time.month"):
    +        ...     gb = da.groupby(groupby_type)
    +        ...     clim = gb.mean(dim="time")
    +        ...     return gb - clim
    +        >>> time = xr.cftime_range("1990-01", "1992-01", freq="M")
    +        >>> np.random.seed(123)
    +        >>> array = xr.DataArray(
    +        ...     np.random.rand(len(time)), dims="time", coords=[time]
    +        ... ).chunk()
    +        >>> ds = xr.Dataset({"a": array})
    +        >>> ds.map_blocks(calculate_anomaly, template=ds).compute()
    +        
    +        array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
    +                0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
    +               -0.19063865,  0.0590131 , -0.12894847, -0.11323072,  0.0855964 ,
    +                0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108,
    +                0.07673453,  0.22865714,  0.19063865, -0.0590131 ])
    +        Coordinates:
    +          * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
    +
    +        Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments
    +        to the function being applied in ``xr.map_blocks()``:
    +
    +        >>> ds.map_blocks(
    +        ...     calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=ds,
    +        ... )
    +        
    +        array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    +               -0.11974531,  0.43791243,  0.14197797, -0.06191987, -0.15073425,
    +               -0.19967375,  0.18619794, -0.05100474, -0.42989909, -0.09153273,
    +                0.24841842, -0.30708526, -0.31412523,  0.04197439,  0.0422506 ,
    +                0.14482397,  0.35985481,  0.23487834,  0.12144652])
    +        Coordinates:
    +            * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
             """
             from .parallel import map_blocks
     
    diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
    index d91dfb4a275..522c5b36ff5 100644
    --- a/xarray/core/parallel.py
    +++ b/xarray/core/parallel.py
    @@ -16,6 +16,8 @@
         DefaultDict,
         Dict,
         Hashable,
    +    Iterable,
    +    List,
         Mapping,
         Sequence,
         Tuple,
    @@ -25,12 +27,29 @@
     
     import numpy as np
     
    +from .alignment import align
     from .dataarray import DataArray
     from .dataset import Dataset
     
     T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset)
     
     
    +def to_object_array(iterable):
    +    # using empty_like calls compute
    +    npargs = np.empty((len(iterable),), dtype=np.object)
    +    npargs[:] = iterable
    +    return npargs
    +
    +
    +def assert_chunks_compatible(a: Dataset, b: Dataset):
    +    a = a.unify_chunks()
    +    b = b.unify_chunks()
    +
    +    for dim in set(a.chunks).intersection(set(b.chunks)):
    +        if a.chunks[dim] != b.chunks[dim]:
    +            raise ValueError(f"Chunk sizes along dimension {dim!r} are not equal.")
    +
    +
     def check_result_variables(
         result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str
     ):
    @@ -67,6 +86,17 @@ def dataset_to_dataarray(obj: Dataset) -> DataArray:
         return next(iter(obj.data_vars.values()))
     
     
    +def dataarray_to_dataset(obj: DataArray) -> Dataset:
    +    # only using _to_temp_dataset would break
    +    # func = lambda x: x.to_dataset()
    +    # since that relies on preserving name.
    +    if obj.name is None:
    +        dataset = obj._to_temp_dataset()
    +    else:
    +        dataset = obj.to_dataset()
    +    return dataset
    +
    +
     def make_meta(obj):
         """If obj is a DataArray or Dataset, return a new object of the same type and with
         the same variables and dtypes, but where all variables have size 0 and numpy
    @@ -150,30 +180,30 @@ def map_blocks(
         ----------
         func: callable
             User-provided function that accepts a DataArray or Dataset as its first
    -        parameter. The function will receive a subset of 'obj' (see below),
    +        parameter ``obj``. The function will receive a subset or 'block' of ``obj`` (see below),
             corresponding to one chunk along each chunked dimension. ``func`` will be
    -        executed as ``func(obj_subset, *args, **kwargs)``.
    +        executed as ``func(subset_obj, *subset_args, **kwargs)``.
     
             This function must return either a single DataArray or a single Dataset.
     
             This function cannot add a new chunked dimension.
     
         obj: DataArray, Dataset
    -        Passed to the function as its first argument, one dask chunk at a time.
    +        Passed to the function as its first argument, one block at a time.
         args: Sequence
    -        Passed verbatim to func after unpacking, after the sliced obj. xarray objects,
    -        if any, will not be split by chunks. Passing dask collections is not allowed.
    +        Passed to func after unpacking and subsetting any xarray objects by blocks.
    +        xarray objects in args must be aligned with obj, otherwise an error is raised.
         kwargs: Mapping
             Passed verbatim to func after unpacking. xarray objects, if any, will not be
    -        split by chunks. Passing dask collections is not allowed.
    +        subset to blocks. Passing dask collections in kwargs is not allowed.
         template: (optional) DataArray, Dataset
             xarray object representing the final result after compute is called. If not provided,
    -        the function will be first run on mocked-up data, that looks like 'obj' but
    +        the function will be first run on mocked-up data, that looks like ``obj`` but
             has sizes 0, to determine properties of the returned object such as dtype,
             variable names, attributes, new dimensions and new indexes (if any).
    -        'template' must be provided if the function changes the size of existing dimensions.
    -        When provided, `attrs` on variables in `template` are copied over to the result. Any
    -        `attrs` set by `func` will be ignored.
    +        ``template`` must be provided if the function changes the size of existing dimensions.
    +        When provided, ``attrs`` on variables in `template` are copied over to the result. Any
    +        ``attrs`` set by ``func`` will be ignored.
     
     
         Returns
    @@ -183,11 +213,11 @@ def map_blocks(
     
         Notes
         -----
    -    This function is designed for when one needs to manipulate a whole xarray object
    -    within each chunk. In the more common case where one can work on numpy arrays, it is
    -    recommended to use apply_ufunc.
    +    This function is designed for when ``func`` needs to manipulate a whole xarray object
    +    subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
    +    recommended to use ``apply_ufunc``.
     
    -    If none of the variables in obj is backed by dask, calling this function is
    +    If none of the variables in ``obj`` is backed by dask arrays, calling this function is
         equivalent to calling ``func(obj, *args, **kwargs)``.
     
         See Also
    @@ -203,10 +233,6 @@ def map_blocks(
         its indices, and its methods like ``.groupby()``.
     
         >>> def calculate_anomaly(da, groupby_type="time.month"):
    -    ...     # Necessary workaround to xarray's check with zero dimensions
    -    ...     # https://github.com/pydata/xarray/issues/3575
    -    ...     if sum(da.shape) == 0:
    -    ...         return da
         ...     gb = da.groupby(groupby_type)
         ...     clim = gb.mean(dim="time")
         ...     return gb - clim
    @@ -215,7 +241,7 @@ def map_blocks(
         >>> array = xr.DataArray(
         ...     np.random.rand(len(time)), dims="time", coords=[time]
         ... ).chunk()
    -    >>> xr.map_blocks(calculate_anomaly, array).compute()
    +    >>> xr.map_blocks(calculate_anomaly, array, template=array).compute()
         
         array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
                 0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
    @@ -229,7 +255,7 @@ def map_blocks(
         to the function being applied in ``xr.map_blocks()``:
     
         >>> xr.map_blocks(
    -    ...     calculate_anomaly, array, kwargs={"groupby_type": "time.year"},
    +    ...     calculate_anomaly, array, kwargs={"groupby_type": "time.year"}, template=array,
         ... )
         
         array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    @@ -241,14 +267,24 @@ def map_blocks(
             * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
         """
     
    -    def _wrapper(func, obj, to_array, args, kwargs, expected):
    -        check_shapes = dict(obj.dims)
    -        check_shapes.update(expected["shapes"])
    -
    -        if to_array:
    -            obj = dataset_to_dataarray(obj)
    -
    -        result = func(obj, *args, **kwargs)
    +    def _wrapper(
    +        func: Callable,
    +        args: List,
    +        kwargs: dict,
    +        arg_is_array: Iterable[bool],
    +        expected: dict,
    +    ):
    +        """
    +        Wrapper function that receives datasets in args; converts to dataarrays when necessary;
    +        passes these to the user function `func` and checks returned objects for expected shapes/sizes/etc.
    +        """
    +
    +        converted_args = [
    +            dataset_to_dataarray(arg) if is_array else arg
    +            for is_array, arg in zip(arg_is_array, args)
    +        ]
    +
    +        result = func(*converted_args, **kwargs)
     
             # check all dims are present
             missing_dimensions = set(expected["shapes"]) - set(result.sizes)
    @@ -259,10 +295,10 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
     
             # check that index lengths and values are as expected
             for name, index in result.indexes.items():
    -            if name in check_shapes:
    -                if len(index) != check_shapes[name]:
    +            if name in expected["shapes"]:
    +                if len(index) != expected["shapes"][name]:
                         raise ValueError(
    -                        f"Received dimension {name!r} of length {len(index)}. Expected length {check_shapes[name]}."
    +                        f"Received dimension {name!r} of length {len(index)}. Expected length {expected['shapes'][name]}."
                         )
                 if name in expected["indexes"]:
                     expected_index = expected["indexes"][name]
    @@ -289,38 +325,44 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
         elif not isinstance(kwargs, Mapping):
             raise TypeError("kwargs must be a mapping (for example, a dict)")
     
    -    for value in list(args) + list(kwargs.values()):
    +    for value in kwargs.values():
             if dask.is_dask_collection(value):
                 raise TypeError(
    -                "Cannot pass dask collections in args or kwargs yet. Please compute or "
    +                "Cannot pass dask collections in kwargs yet. Please compute or "
                     "load values before passing to map_blocks."
                 )
     
         if not dask.is_dask_collection(obj):
             return func(obj, *args, **kwargs)
     
    -    if isinstance(obj, DataArray):
    -        # only using _to_temp_dataset would break
    -        # func = lambda x: x.to_dataset()
    -        # since that relies on preserving name.
    -        if obj.name is None:
    -            dataset = obj._to_temp_dataset()
    -        else:
    -            dataset = obj.to_dataset()
    -        input_is_array = True
    -    else:
    -        dataset = obj
    -        input_is_array = False
    +    npargs = to_object_array([obj] + list(args))
    +    is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in npargs]
    +    is_array = [isinstance(arg, DataArray) for arg in npargs]
    +
    +    # all xarray objects must be aligned. This is consistent with apply_ufunc.
    +    aligned = align(*npargs[is_xarray], join="exact")
    +    # assigning to object arrays works better when RHS is object array
    +    # https://stackoverflow.com/questions/43645135/boolean-indexing-assignment-of-a-numpy-array-to-a-numpy-array
    +    npargs[is_xarray] = to_object_array(aligned)
    +    npargs[is_array] = to_object_array(
    +        [dataarray_to_dataset(da) for da in npargs[is_array]]
    +    )
    +
    +    # check that chunk sizes are compatible
    +    input_chunks = dict(npargs[0].chunks)
    +    input_indexes = dict(npargs[0].indexes)
    +    for arg in npargs[1:][is_xarray[1:]]:
    +        assert_chunks_compatible(npargs[0], arg)
    +        input_chunks.update(arg.chunks)
    +        input_indexes.update(arg.indexes)
     
    -    input_chunks = dataset.chunks
    -    dataset_indexes = set(dataset.indexes)
         if template is None:
             # infer template by providing zero-shaped arrays
    -        template = infer_template(func, obj, *args, **kwargs)
    +        template = infer_template(func, aligned[0], *args, **kwargs)
             template_indexes = set(template.indexes)
    -        preserved_indexes = template_indexes & dataset_indexes
    -        new_indexes = template_indexes - dataset_indexes
    -        indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes}
    +        preserved_indexes = template_indexes & set(input_indexes)
    +        new_indexes = template_indexes - set(input_indexes)
    +        indexes = {dim: input_indexes[dim] for dim in preserved_indexes}
             indexes.update({k: template.indexes[k] for k in new_indexes})
             output_chunks = {
                 dim: input_chunks[dim] for dim in template.dims if dim in input_chunks
    @@ -328,13 +370,11 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
     
         else:
             # template xarray object has been provided with proper sizes and chunk shapes
    -        template_indexes = set(template.indexes)
    -        indexes = {dim: dataset.indexes[dim] for dim in dataset_indexes}
    -        indexes.update({k: template.indexes[k] for k in template_indexes})
    +        indexes = dict(template.indexes)
             if isinstance(template, DataArray):
                 output_chunks = dict(zip(template.dims, template.chunks))  # type: ignore
             else:
    -            output_chunks = template.chunks  # type: ignore
    +            output_chunks = dict(template.chunks)
     
         for dim in output_chunks:
             if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]):
    @@ -363,7 +403,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
         graph: Dict[Any, Any] = {}
         new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict)
         gname = "{}-{}".format(
    -        dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs)
    +        dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs)
         )
     
         # map dims to list of chunk indexes
    @@ -376,9 +416,14 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
             dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items()
         }
     
    -    # iterate over all possible chunk combinations
    -    for v in itertools.product(*ichunk.values()):
    -        chunk_index = dict(zip(dataset.dims, v))
    +    def subset_dataset_to_block(
    +        graph: dict, gname: str, dataset: Dataset, input_chunk_bounds, chunk_index
    +    ):
    +        """
    +        Creates a task that subsets an xarray dataset to a block determined by chunk_index.
    +        Block extents are determined by input_chunk_bounds.
    +        Also subtasks that subset the constituent variables of a dataset.
    +        """
     
             # this will become [[name1, variable1],
             #                   [name2, variable2],
    @@ -387,6 +432,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
             data_vars = []
             coords = []
     
    +        chunk_tuple = tuple(chunk_index.values())
             for name, variable in dataset.variables.items():
                 # make a task that creates tuple of (dims, chunk)
                 if dask.is_dask_collection(variable.data):
    @@ -395,13 +441,13 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                     for dim in variable.dims:
                         chunk = chunk[chunk_index[dim]]
     
    -                chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + v
    +                chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + chunk_tuple
                     graph[chunk_variable_task] = (
                         tuple,
                         [variable.dims, chunk, variable.attrs],
                     )
                 else:
    -                # non-dask array with possibly chunked dimensions
    +                # non-dask array possibly with dimensions chunked on other variables
                     # index into variable appropriately
                     subsetter = {
                         dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds)
    @@ -410,7 +456,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                     subset = variable.isel(subsetter)
                     chunk_variable_task = (
                         "{}-{}".format(gname, dask.base.tokenize(subset)),
    -                ) + v
    +                ) + chunk_tuple
                     graph[chunk_variable_task] = (
                         tuple,
                         [subset.dims, subset, subset.attrs],
    @@ -422,7 +468,22 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 else:
                     data_vars.append([name, chunk_variable_task])
     
    -        # expected["shapes", "coords", "data_vars", "indexes"] are used to raise nice error messages in _wrapper
    +        return (Dataset, (dict, data_vars), (dict, coords), dataset.attrs)
    +
    +    # iterate over all possible chunk combinations
    +    for chunk_tuple in itertools.product(*ichunk.values()):
    +        # mapping from dimension name to chunk index
    +        chunk_index = dict(zip(ichunk.keys(), chunk_tuple))
    +
    +        blocked_args = [
    +            subset_dataset_to_block(graph, gname, arg, input_chunk_bounds, chunk_index)
    +            if isxr
    +            else arg
    +            for isxr, arg in zip(is_xarray, npargs)
    +        ]
    +
    +        # expected["shapes", "coords", "data_vars", "indexes"] are used to
    +        # raise nice error messages in _wrapper
             expected = {}
             # input chunk 0 along a dimension maps to output chunk 0 along the same dimension
             # even if length of dimension is changed by the applied function
    @@ -436,16 +497,8 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 for dim in indexes
             }
     
    -        from_wrapper = (gname,) + v
    -        graph[from_wrapper] = (
    -            _wrapper,
    -            func,
    -            (Dataset, (dict, data_vars), (dict, coords), dataset.attrs),
    -            input_is_array,
    -            args,
    -            kwargs,
    -            expected,
    -        )
    +        from_wrapper = (gname,) + chunk_tuple
    +        graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected)
     
             # mapping from variable name to dask graph key
             var_key_map: Dict[Hashable, str] = {}
    @@ -472,7 +525,11 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 # layer.
                 new_layers[gname_l][key] = (operator.getitem, from_wrapper, name)
     
    -    hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset])
    +    hlg = HighLevelGraph.from_collections(
    +        gname,
    +        graph,
    +        dependencies=[arg for arg in npargs if dask.is_dask_collection(arg)],
    +    )
     
         for gname_l, layer in new_layers.items():
             # This adds in the getitems for each variable in the dataset.
    @@ -480,6 +537,10 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
             hlg.layers[gname_l] = layer
     
         result = Dataset(coords=indexes, attrs=template.attrs)
    +    for index in result.indexes:
    +        result[index].attrs = template[index].attrs
    +        result[index].encoding = template[index].encoding
    +
         for name, gname_l in var_key_map.items():
             dims = template[name].dims
             var_chunks = []
    @@ -496,6 +557,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype
             )
             result[name] = (dims, data, template[name].attrs)
    +        result[name].encoding = template[name].encoding
     
         result = result.set_coords(template._coord_names)
     
    diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
    index 6f714fe1825..caeb7ad4dc8 100644
    --- a/xarray/tests/test_dask.py
    +++ b/xarray/tests/test_dask.py
    @@ -972,6 +972,7 @@ def make_da():
             coords={"x": np.arange(10), "y": np.arange(100, 120)},
             name="a",
         ).chunk({"x": 4, "y": 5})
    +    da.x.attrs["long_name"] = "x"
         da.attrs["test"] = "test"
         da.coords["c2"] = 0.5
         da.coords["ndcoord"] = da.x * 2
    @@ -995,6 +996,9 @@ def make_ds():
         map_ds.attrs["test"] = "test"
         map_ds.coords["xx"] = map_ds["a"] * map_ds.y
     
    +    map_ds.x.attrs["long_name"] = "x"
    +    map_ds.y.attrs["long_name"] = "y"
    +
         return map_ds
     
     
    @@ -1074,9 +1078,6 @@ def really_bad_func(darray):
         with raises_regex(ValueError, "inconsistent chunks"):
             xr.map_blocks(bad_func, ds_copy)
     
    -    with raises_regex(TypeError, "Cannot pass dask collections"):
    -        xr.map_blocks(bad_func, map_da, args=[map_da.chunk()])
    -
         with raises_regex(TypeError, "Cannot pass dask collections"):
             xr.map_blocks(bad_func, map_da, kwargs=dict(a=map_da.chunk()))
     
    @@ -1103,6 +1104,58 @@ def test_map_blocks_convert_args_to_list(obj):
         assert_identical(actual, expected)
     
     
    +def test_map_blocks_dask_args():
    +    da1 = xr.DataArray(
    +        np.ones((10, 20)),
    +        dims=["x", "y"],
    +        coords={"x": np.arange(10), "y": np.arange(20)},
    +    ).chunk({"x": 5, "y": 4})
    +
    +    # check that block shapes are the same
    +    def sumda(da1, da2):
    +        assert da1.shape == da2.shape
    +        return da1 + da2
    +
    +    da2 = da1 + 1
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(sumda, da1, args=[da2])
    +    xr.testing.assert_equal(da1 + da2, mapped)
    +
    +    # one dimension in common
    +    da2 = (da1 + 1).isel(x=1, drop=True)
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(operator.add, da1, args=[da2])
    +    xr.testing.assert_equal(da1 + da2, mapped)
    +
    +    # test that everything works when dimension names are different
    +    da2 = (da1 + 1).isel(x=1, drop=True).rename({"y": "k"})
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(operator.add, da1, args=[da2])
    +    xr.testing.assert_equal(da1 + da2, mapped)
    +
    +    with raises_regex(ValueError, "Chunk sizes along dimension 'x'"):
    +        xr.map_blocks(operator.add, da1, args=[da1.chunk({"x": 1})])
    +
    +    with raises_regex(ValueError, "indexes along dimension 'x' are not equal"):
    +        xr.map_blocks(operator.add, da1, args=[da1.reindex(x=np.arange(20))])
    +
    +    # reduction
    +    da1 = da1.chunk({"x": -1})
    +    da2 = da1 + 1
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(lambda a, b: (a + b).sum("x"), da1, args=[da2])
    +    xr.testing.assert_equal((da1 + da2).sum("x"), mapped)
    +
    +    # reduction with template
    +    da1 = da1.chunk({"x": -1})
    +    da2 = da1 + 1
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(
    +            lambda a, b: (a + b).sum("x"), da1, args=[da2], template=da1.sum("x")
    +        )
    +    xr.testing.assert_equal((da1 + da2).sum("x"), mapped)
    +
    +
     @pytest.mark.parametrize("obj", [make_da(), make_ds()])
     def test_map_blocks_add_attrs(obj):
         def add_attrs(obj):
    
    From 4071125feedee690364272e8fde9b94866f85bc7 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Thu, 11 Jun 2020 04:14:48 +0200
    Subject: [PATCH 046/342] Fix the upstream-dev pandas build failure (#4138)
    
    * use the scipy-wheels-nightly repository for numpy, scipy and pandas
    
    * update the repository url
    
    * avoid installing over the conda packages
    
    * use the repository as a package index
    
    * run the uninstall with -y
    
    * use the correct url for the scipy-wheels-nightly repository
    ---
     ci/azure/install.yml | 27 ++++++++++++++++++++++-----
     1 file changed, 22 insertions(+), 5 deletions(-)
    
    diff --git a/ci/azure/install.yml b/ci/azure/install.yml
    index 60559dd2064..eff229e863a 100644
    --- a/ci/azure/install.yml
    +++ b/ci/azure/install.yml
    @@ -12,14 +12,32 @@ steps:
     
     - bash: |
         source activate xarray-tests
    +    conda uninstall -y --force \
    +        numpy \
    +        scipy \
    +        pandas \
    +        matplotlib \
    +        dask \
    +        distributed \
    +        zarr \
    +        cftime \
    +        rasterio \
    +        pint \
    +        bottleneck
         python -m pip install \
    -        -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \
    +        -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \
             --no-deps \
             --pre \
             --upgrade \
    -        matplotlib \
             numpy \
    -        scipy
    +        scipy \
    +        pandas
    +    python -m pip install \
    +        -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \
    +        --no-deps \
    +        --pre \
    +        --upgrade \
    +        matplotlib
         python -m pip install \
             --no-deps \
             --upgrade \
    @@ -29,8 +47,7 @@ steps:
             git+https://github.com/Unidata/cftime \
             git+https://github.com/mapbox/rasterio \
             git+https://github.com/hgrecco/pint \
    -        git+https://github.com/pydata/bottleneck \
    -        git+https://github.com/pandas-dev/pandas
    +        git+https://github.com/pydata/bottleneck
       condition: eq(variables['UPSTREAM_DEV'], 'true')
       displayName: Install upstream dev dependencies
     
    
    From 8f688ea92ae8416ecc3e18f6e060dad16960e9ac Mon Sep 17 00:00:00 2001
    From: Spencer Clark 
    Date: Thu, 11 Jun 2020 19:23:28 -0400
    Subject: [PATCH 047/342] Remove outdated note from datetime accessor docstring
     (#4148)
    
    ---
     xarray/core/accessor_dt.py | 6 ------
     1 file changed, 6 deletions(-)
    
    diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py
    index 2977596036c..630aaee142f 100644
    --- a/xarray/core/accessor_dt.py
    +++ b/xarray/core/accessor_dt.py
    @@ -240,12 +240,6 @@ class DatetimeAccessor(Properties):
         Fields can be accessed through the `.dt` attribute
         for applicable DataArrays.
     
    -    Notes
    -    ------
    -    Note that these fields are not calendar-aware; if your datetimes are encoded
    -    with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime,
    -    then some fields like `dayofyear` may not be accurate.
    -
         Examples
         ---------
         >>> import xarray as xr
    
    From 59a239710e0510f0cad28c7a521d8827a6633c36 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Fri, 12 Jun 2020 17:03:19 +0200
    Subject: [PATCH 048/342] speed up map_blocks (#4149)
    
    * replace the object array with generator expressions and zip/enumerate
    
    * remove a leftover grouping pair of parentheses
    
    * reuse is_array instead of comparing again
    ---
     xarray/core/parallel.py | 37 ++++++++++++++++++++++---------------
     1 file changed, 22 insertions(+), 15 deletions(-)
    
    diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
    index 522c5b36ff5..3a77753d0d1 100644
    --- a/xarray/core/parallel.py
    +++ b/xarray/core/parallel.py
    @@ -34,11 +34,8 @@
     T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset)
     
     
    -def to_object_array(iterable):
    -    # using empty_like calls compute
    -    npargs = np.empty((len(iterable),), dtype=np.object)
    -    npargs[:] = iterable
    -    return npargs
    +def unzip(iterable):
    +    return zip(*iterable)
     
     
     def assert_chunks_compatible(a: Dataset, b: Dataset):
    @@ -335,23 +332,33 @@ def _wrapper(
         if not dask.is_dask_collection(obj):
             return func(obj, *args, **kwargs)
     
    -    npargs = to_object_array([obj] + list(args))
    -    is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in npargs]
    -    is_array = [isinstance(arg, DataArray) for arg in npargs]
    +    all_args = [obj] + list(args)
    +    is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in all_args]
    +    is_array = [isinstance(arg, DataArray) for arg in all_args]
    +
    +    # there should be a better way to group this. partition?
    +    xarray_indices, xarray_objs = unzip(
    +        (index, arg) for index, arg in enumerate(all_args) if is_xarray[index]
    +    )
    +    others = [
    +        (index, arg) for index, arg in enumerate(all_args) if not is_xarray[index]
    +    ]
     
         # all xarray objects must be aligned. This is consistent with apply_ufunc.
    -    aligned = align(*npargs[is_xarray], join="exact")
    -    # assigning to object arrays works better when RHS is object array
    -    # https://stackoverflow.com/questions/43645135/boolean-indexing-assignment-of-a-numpy-array-to-a-numpy-array
    -    npargs[is_xarray] = to_object_array(aligned)
    -    npargs[is_array] = to_object_array(
    -        [dataarray_to_dataset(da) for da in npargs[is_array]]
    +    aligned = align(*xarray_objs, join="exact")
    +    xarray_objs = tuple(
    +        dataarray_to_dataset(arg) if is_da else arg
    +        for is_da, arg in zip(is_array, aligned)
    +    )
    +
    +    _, npargs = unzip(
    +        sorted(list(zip(xarray_indices, xarray_objs)) + others, key=lambda x: x[0])
         )
     
         # check that chunk sizes are compatible
         input_chunks = dict(npargs[0].chunks)
         input_indexes = dict(npargs[0].indexes)
    -    for arg in npargs[1:][is_xarray[1:]]:
    +    for arg in xarray_objs[1:]:
             assert_chunks_compatible(npargs[0], arg)
             input_chunks.update(arg.chunks)
             input_indexes.update(arg.indexes)
    
    From 48fbee08711bf01a4de9a822e0721608f7dd3093 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Fri, 12 Jun 2020 17:03:55 +0200
    Subject: [PATCH 049/342] parameter documentation for DataArray.sel (#4150)
    
    * copy the parameter documentation of Dataset.sel to DataArray.sel
    
    * reflow the return value documentation
    
    * update whats-new.rst
    ---
     doc/whats-new.rst        |  4 +++-
     xarray/core/dataarray.py | 52 ++++++++++++++++++++++++++++++++++++++++
     2 files changed, 55 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 85e73e1b7e8..68b2d738073 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -172,8 +172,10 @@ Documentation
       By `Matthias Riße `_.
     - Apply ``black`` to all the code in the documentation (:pull:`4012`)
       By `Justus Magin `_.
    -- Narrative documentation now describes :py:meth:`map_blocks`. :ref:`dask.automatic-parallelization`.
    +- Narrative documentation now describes :py:meth:`map_blocks`: :ref:`dask.automatic-parallelization`.
       By `Deepak Cherian `_.
    +- Add documentation for the parameters and return values of :py:meth:`DataArray.sel`.
    +  By `Justus Magin `_.
     
     Internal Changes
     ~~~~~~~~~~~~~~~~
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index 3451ff14c8f..44773e36e30 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -1076,6 +1076,19 @@ def sel(
             """Return a new DataArray whose data is given by selecting index
             labels along the specified dimension(s).
     
    +        In contrast to `DataArray.isel`, indexers for this method should use
    +        labels instead of integers.
    +
    +        Under the hood, this method is powered by using pandas's powerful Index
    +        objects. This makes label based indexing essentially just as fast as
    +        using integer indexing.
    +
    +        It also means this method uses pandas's (well documented) logic for
    +        indexing. This means you can use string shortcuts for datetime indexes
    +        (e.g., '2000-01' to select all values in January 2000). It also means
    +        that slices are treated as inclusive of both the start and stop values,
    +        unlike normal Python indexing.
    +
             .. warning::
     
               Do not try to assign values when using any of the indexing methods
    @@ -1088,6 +1101,45 @@ def sel(
               Assigning values with the chained indexing using ``.sel`` or
               ``.isel`` fails silently.
     
    +        Parameters
    +        ----------
    +        indexers : dict, optional
    +            A dict with keys matching dimensions and values given
    +            by scalars, slices or arrays of tick labels. For dimensions with
    +            multi-index, the indexer may also be a dict-like object with keys
    +            matching index level names.
    +            If DataArrays are passed as indexers, xarray-style indexing will be
    +            carried out. See :ref:`indexing` for the details.
    +            One of indexers or indexers_kwargs must be provided.
    +        method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
    +            Method to use for inexact matches:
    +
    +            * None (default): only exact matches
    +            * pad / ffill: propagate last valid index value forward
    +            * backfill / bfill: propagate next valid index value backward
    +            * nearest: use nearest valid index value
    +        tolerance : optional
    +            Maximum distance between original and new labels for inexact
    +            matches. The values of the index at the matching locations must
    +            satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
    +        drop : bool, optional
    +            If ``drop=True``, drop coordinates variables in `indexers` instead
    +            of making them scalar.
    +        **indexers_kwargs : {dim: indexer, ...}, optional
    +            The keyword arguments form of ``indexers``.
    +            One of indexers or indexers_kwargs must be provided.
    +
    +        Returns
    +        -------
    +        obj : DataArray
    +            A new DataArray with the same contents as this DataArray, except the
    +            data and each dimension is indexed by the appropriate indexers.
    +            If indexer DataArrays have coordinates that do not conflict with
    +            this object, then these coordinates will be attached.
    +            In general, each array's data will be a view of the array's data
    +            in this DataArray, unless vectorized indexing was triggered by using
    +            an array indexer, in which case the data will be a copy.
    +
             See Also
             --------
             Dataset.sel
    
    From e8bd8665e8fd762031c2d9c87987d21e113e41cc Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Fri, 12 Jun 2020 15:04:18 +0000
    Subject: [PATCH 050/342] Recommend installing cftime when time decoding fails.
     (#4134)
    
    ---
     xarray/coding/times.py | 5 +++--
     1 file changed, 3 insertions(+), 2 deletions(-)
    
    diff --git a/xarray/coding/times.py b/xarray/coding/times.py
    index d923f1ad088..dafa8ca03b1 100644
    --- a/xarray/coding/times.py
    +++ b/xarray/coding/times.py
    @@ -80,8 +80,9 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime):
                 "the default calendar" if calendar is None else "calendar %r" % calendar
             )
             msg = (
    -            "unable to decode time units %r with %s. Try "
    -            "opening your dataset with decode_times=False." % (units, calendar_msg)
    +            f"unable to decode time units {units!r} with {calendar_msg!r}. Try "
    +            "opening your dataset with decode_times=False or installing cftime "
    +            "if it is not installed."
             )
             raise ValueError(msg)
         else:
    
    From e26b80f3b813d84520eef4d371a2609fd09182e3 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sat, 13 Jun 2020 19:52:45 +0200
    Subject: [PATCH 051/342] built-in accessor documentation (#3988)
    
    * ad a property-like descriptor that works both on objects and classes
    
    * generate documentation for the plotting accessor methods
    
    * add a docstring to the custom property-like descriptor
    
    * use the accessor syntax in the main plotting section
    
    * explain why we need a custom property class
    
    * rename the custom property to UncachedAccessor
    
    to match the behavior of _CachedAccessor, it also accepts the
    accessor class (not the object). We lose the ability for custom
    docstrings, though.
    
    * declare that __call__ wraps plot
    
    * add accessor tests
    
    * add the autosummary templates from pandas
    
    * update the plotting section to use the accessor templates
    
    * remove the separate callable section
    
    * fix the import order
    
    * add the DataArray.str accessor as a new subsection
    
    * add the datetime accessor to the main api page
    
    * move the plotting functions into the DataArray / Dataset sections
    
    * remove the documentation of the accessor class itself
    
    * manually copy the docstring since functools.wraps does more than that
    
    * also copy the annotations and mark __call__ as wrapping plot
    
    * re-enable __slots__
    
    * update whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/_templates/autosummary/accessor.rst       |   6 +
     .../autosummary/accessor_attribute.rst        |   6 +
     .../autosummary/accessor_callable.rst         |   6 +
     .../autosummary/accessor_method.rst           |   6 +
     doc/api.rst                                   | 165 ++++++++++++++++--
     doc/conf.py                                   | 116 ++++++++++++
     doc/whats-new.rst                             |   2 +
     xarray/core/dataarray.py                      |  23 +--
     xarray/core/dataset.py                        |  11 +-
     xarray/core/utils.py                          |  18 ++
     xarray/plot/plot.py                           |   5 +
     xarray/tests/test_plot.py                     |  12 ++
     12 files changed, 327 insertions(+), 49 deletions(-)
     create mode 100644 doc/_templates/autosummary/accessor.rst
     create mode 100644 doc/_templates/autosummary/accessor_attribute.rst
     create mode 100644 doc/_templates/autosummary/accessor_callable.rst
     create mode 100644 doc/_templates/autosummary/accessor_method.rst
    
    diff --git a/doc/_templates/autosummary/accessor.rst b/doc/_templates/autosummary/accessor.rst
    new file mode 100644
    index 00000000000..4ba745cd6fd
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessor:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
    diff --git a/doc/_templates/autosummary/accessor_attribute.rst b/doc/_templates/autosummary/accessor_attribute.rst
    new file mode 100644
    index 00000000000..b5ad65d6a73
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor_attribute.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessorattribute:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
    diff --git a/doc/_templates/autosummary/accessor_callable.rst b/doc/_templates/autosummary/accessor_callable.rst
    new file mode 100644
    index 00000000000..7a3301814f5
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor_callable.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessorcallable:: {{ (module.split('.')[1:] + [objname]) | join('.') }}.__call__
    diff --git a/doc/_templates/autosummary/accessor_method.rst b/doc/_templates/autosummary/accessor_method.rst
    new file mode 100644
    index 00000000000..aefbba6ef1b
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor_method.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessormethod:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
    diff --git a/doc/api.rst b/doc/api.rst
    index 3f25ac1a070..bb0edd0dfa5 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -233,6 +233,15 @@ Reshaping and reorganizing
        Dataset.sortby
        Dataset.broadcast_like
     
    +Plotting
    +--------
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   Dataset.plot.scatter
    +
     DataArray
     =========
     
    @@ -403,6 +412,122 @@ Computation
     :py:attr:`~core.groupby.DataArrayGroupBy.where`
     :py:attr:`~core.groupby.DataArrayGroupBy.quantile`
     
    +
    +String manipulation
    +-------------------
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.str.capitalize
    +   DataArray.str.center
    +   DataArray.str.contains
    +   DataArray.str.count
    +   DataArray.str.decode
    +   DataArray.str.encode
    +   DataArray.str.endswith
    +   DataArray.str.find
    +   DataArray.str.get
    +   DataArray.str.index
    +   DataArray.str.isalnum
    +   DataArray.str.isalpha
    +   DataArray.str.isdecimal
    +   DataArray.str.isdigit
    +   DataArray.str.isnumeric
    +   DataArray.str.isspace
    +   DataArray.str.istitle
    +   DataArray.str.isupper
    +   DataArray.str.len
    +   DataArray.str.ljust
    +   DataArray.str.lower
    +   DataArray.str.lstrip
    +   DataArray.str.match
    +   DataArray.str.pad
    +   DataArray.str.repeat
    +   DataArray.str.replace
    +   DataArray.str.rfind
    +   DataArray.str.rindex
    +   DataArray.str.rjust
    +   DataArray.str.rstrip
    +   DataArray.str.slice
    +   DataArray.str.slice_replace
    +   DataArray.str.startswith
    +   DataArray.str.strip
    +   DataArray.str.swapcase
    +   DataArray.str.title
    +   DataArray.str.translate
    +   DataArray.str.upper
    +   DataArray.str.wrap
    +   DataArray.str.zfill
    +
    +Datetimelike properties
    +-----------------------
    +
    +**Datetime properties**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_attribute.rst
    +
    +   DataArray.dt.year
    +   DataArray.dt.month
    +   DataArray.dt.day
    +   DataArray.dt.hour
    +   DataArray.dt.minute
    +   DataArray.dt.second
    +   DataArray.dt.microsecond
    +   DataArray.dt.nanosecond
    +   DataArray.dt.weekofyear
    +   DataArray.dt.week
    +   DataArray.dt.dayofweek
    +   DataArray.dt.weekday
    +   DataArray.dt.weekday_name
    +   DataArray.dt.dayofyear
    +   DataArray.dt.quarter
    +   DataArray.dt.days_in_month
    +   DataArray.dt.daysinmonth
    +   DataArray.dt.season
    +   DataArray.dt.time
    +   DataArray.dt.is_month_start
    +   DataArray.dt.is_month_end
    +   DataArray.dt.is_quarter_end
    +   DataArray.dt.is_year_start
    +   DataArray.dt.is_leap_year
    +
    +**Datetime methods**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.dt.floor
    +   DataArray.dt.ceil
    +   DataArray.dt.round
    +   DataArray.dt.strftime
    +
    +**Timedelta properties**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_attribute.rst
    +
    +   DataArray.dt.days
    +   DataArray.dt.seconds
    +   DataArray.dt.microseconds
    +   DataArray.dt.nanoseconds
    +
    +**Timedelta methods**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.dt.floor
    +   DataArray.dt.ceil
    +   DataArray.dt.round
    +
    +
     Reshaping and reorganizing
     --------------------------
     
    @@ -419,6 +544,27 @@ Reshaping and reorganizing
        DataArray.sortby
        DataArray.broadcast_like
     
    +Plotting
    +--------
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_callable.rst
    +
    +   DataArray.plot
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.plot.contourf
    +   DataArray.plot.contour
    +   DataArray.plot.hist
    +   DataArray.plot.imshow
    +   DataArray.plot.line
    +   DataArray.plot.pcolormesh
    +   DataArray.plot.step
    +
     .. _api.ufuncs:
     
     Universal functions
    @@ -664,25 +810,6 @@ Creating custom indexes
     
        cftime_range
     
    -Plotting
    -========
    -
    -.. autosummary::
    -   :toctree: generated/
    -
    -   Dataset.plot
    -   plot.scatter
    -   DataArray.plot
    -   plot.plot
    -   plot.contourf
    -   plot.contour
    -   plot.hist
    -   plot.imshow
    -   plot.line
    -   plot.pcolormesh
    -   plot.step
    -   plot.FacetGrid
    -
     Faceting
     --------
     .. autosummary::
    diff --git a/doc/conf.py b/doc/conf.py
    index 6b16468d29e..d3d126cb33f 100644
    --- a/doc/conf.py
    +++ b/doc/conf.py
    @@ -20,6 +20,12 @@
     import sys
     from contextlib import suppress
     
    +# --------- autosummary templates ------------------
    +# TODO: eventually replace this with a sphinx.ext.auto_accessor module
    +import sphinx
    +from sphinx.ext.autodoc import AttributeDocumenter, Documenter, MethodDocumenter
    +from sphinx.util import rpartition
    +
     # make sure the source version is preferred (#3567)
     root = pathlib.Path(__file__).absolute().parent.parent
     os.environ["PYTHONPATH"] = str(root)
    @@ -358,3 +364,113 @@
         "dask": ("https://docs.dask.org/en/latest", None),
         "cftime": ("https://unidata.github.io/cftime", None),
     }
    +
    +
    +# --------- autosummary templates ------------------
    +# TODO: eventually replace this with a sphinx.ext.auto_accessor module
    +class AccessorDocumenter(MethodDocumenter):
    +    """
    +    Specialized Documenter subclass for accessors.
    +    """
    +
    +    objtype = "accessor"
    +    directivetype = "method"
    +
    +    # lower than MethodDocumenter so this is not chosen for normal methods
    +    priority = 0.6
    +
    +    def format_signature(self):
    +        # this method gives an error/warning for the accessors, therefore
    +        # overriding it (accessor has no arguments)
    +        return ""
    +
    +
    +class AccessorLevelDocumenter(Documenter):
    +    """
    +    Specialized Documenter subclass for objects on accessor level (methods,
    +    attributes).
    +    """
    +
    +    # This is the simple straightforward version
    +    # modname is None, base the last elements (eg 'hour')
    +    # and path the part before (eg 'Series.dt')
    +    # def resolve_name(self, modname, parents, path, base):
    +    #     modname = 'pandas'
    +    #     mod_cls = path.rstrip('.')
    +    #     mod_cls = mod_cls.split('.')
    +    #
    +    #     return modname, mod_cls + [base]
    +
    +    def resolve_name(self, modname, parents, path, base):
    +        if modname is None:
    +            if path:
    +                mod_cls = path.rstrip(".")
    +            else:
    +                mod_cls = None
    +                # if documenting a class-level object without path,
    +                # there must be a current class, either from a parent
    +                # auto directive ...
    +                mod_cls = self.env.temp_data.get("autodoc:class")
    +                # ... or from a class directive
    +                if mod_cls is None:
    +                    mod_cls = self.env.temp_data.get("py:class")
    +                # ... if still None, there's no way to know
    +                if mod_cls is None:
    +                    return None, []
    +            # HACK: this is added in comparison to ClassLevelDocumenter
    +            # mod_cls still exists of class.accessor, so an extra
    +            # rpartition is needed
    +            modname, accessor = rpartition(mod_cls, ".")
    +            modname, cls = rpartition(modname, ".")
    +            parents = [cls, accessor]
    +            # if the module name is still missing, get it like above
    +            if not modname:
    +                modname = self.env.temp_data.get("autodoc:module")
    +            if not modname:
    +                if sphinx.__version__ > "1.3":
    +                    modname = self.env.ref_context.get("py:module")
    +                else:
    +                    modname = self.env.temp_data.get("py:module")
    +            # ... else, it stays None, which means invalid
    +        return modname, parents + [base]
    +
    +
    +class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter):
    +
    +    objtype = "accessorattribute"
    +    directivetype = "attribute"
    +
    +    # lower than AttributeDocumenter so this is not chosen for normal attributes
    +    priority = 0.6
    +
    +
    +class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter):
    +
    +    objtype = "accessormethod"
    +    directivetype = "method"
    +
    +    # lower than MethodDocumenter so this is not chosen for normal methods
    +    priority = 0.6
    +
    +
    +class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter):
    +    """
    +    This documenter lets us removes .__call__ from the method signature for
    +    callable accessors like Series.plot
    +    """
    +
    +    objtype = "accessorcallable"
    +    directivetype = "method"
    +
    +    # lower than MethodDocumenter; otherwise the doc build prints warnings
    +    priority = 0.5
    +
    +    def format_name(self):
    +        return MethodDocumenter.format_name(self).rstrip(".__call__")
    +
    +
    +def setup(app):
    +    app.add_autodocumenter(AccessorDocumenter)
    +    app.add_autodocumenter(AccessorAttributeDocumenter)
    +    app.add_autodocumenter(AccessorMethodDocumenter)
    +    app.add_autodocumenter(AccessorCallableDocumenter)
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 68b2d738073..dade282d49a 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -174,6 +174,8 @@ Documentation
       By `Justus Magin `_.
     - Narrative documentation now describes :py:meth:`map_blocks`: :ref:`dask.automatic-parallelization`.
       By `Deepak Cherian `_.
    +- Document ``.plot``, ``.dt``, ``.str`` accessors the way they are called. (:issue:`3625`, :pull:`3988`)
    +  By `Justus Magin `_.
     - Add documentation for the parameters and return values of :py:meth:`DataArray.sel`.
       By `Justus Magin `_.
     
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index 44773e36e30..5814c828663 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -260,7 +260,7 @@ class DataArray(AbstractArray, DataWithCoords):
         _resample_cls = resample.DataArrayResample
         _weighted_cls = weighted.DataArrayWeighted
     
    -    dt = property(CombinedDatetimelikeAccessor)
    +    dt = utils.UncachedAccessor(CombinedDatetimelikeAccessor)
     
         def __init__(
             self,
    @@ -2722,24 +2722,7 @@ def func(self, other):
         def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None:
             self.attrs = other.attrs
     
    -    @property
    -    def plot(self) -> _PlotMethods:
    -        """
    -        Access plotting functions for DataArray's
    -
    -        >>> d = xr.DataArray([[1, 2], [3, 4]])
    -
    -        For convenience just call this directly
    -
    -        >>> d.plot()
    -
    -        Or use it as a namespace to use xarray.plot functions as
    -        DataArray methods
    -
    -        >>> d.plot.imshow()  # equivalent to xarray.plot.imshow(d)
    -
    -        """
    -        return _PlotMethods(self)
    +    plot = utils.UncachedAccessor(_PlotMethods)
     
         def _title_for_slice(self, truncate: int = 50) -> str:
             """
    @@ -3831,7 +3814,7 @@ def idxmax(
     
         # this needs to be at the end, or mypy will confuse with `str`
         # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
    -    str = property(StringAccessor)
    +    str = utils.UncachedAccessor(StringAccessor)
     
     
     # priority most be higher than Variable to properly work with binary ufuncs
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index a8011afd3e3..2958cad89b2 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -5563,16 +5563,7 @@ def real(self):
         def imag(self):
             return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
     
    -    @property
    -    def plot(self):
    -        """
    -        Access plotting functions for Datasets.
    -        Use it as a namespace to use xarray.plot functions as Dataset methods
    -
    -        >>> ds.plot.scatter(...)  # equivalent to xarray.plot.scatter(ds,...)
    -
    -        """
    -        return _Dataset_PlotMethods(self)
    +    plot = utils.UncachedAccessor(_Dataset_PlotMethods)
     
         def filter_by_attrs(self, **kwargs):
             """Returns a ``Dataset`` with variables that match specific conditions.
    diff --git a/xarray/core/utils.py b/xarray/core/utils.py
    index 1126cf3037f..0542f850b02 100644
    --- a/xarray/core/utils.py
    +++ b/xarray/core/utils.py
    @@ -787,6 +787,24 @@ def drop_dims_from_indexers(
             )
     
     
    +class UncachedAccessor:
    +    """ Acts like a property, but on both classes and class instances
    +
    +    This class is necessary because some tools (e.g. pydoc and sphinx)
    +    inspect classes for which property returns itself and not the
    +    accessor.
    +    """
    +
    +    def __init__(self, accessor):
    +        self._accessor = accessor
    +
    +    def __get__(self, obj, cls):
    +        if obj is None:
    +            return self._accessor
    +
    +        return self._accessor(obj)
    +
    +
     # Singleton type, as per https://github.com/python/typing/pull/240
     class Default(Enum):
         token = 0
    diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
    index 19a3f1e63e3..e4a981daf8c 100644
    --- a/xarray/plot/plot.py
    +++ b/xarray/plot/plot.py
    @@ -445,6 +445,11 @@ def __init__(self, darray):
         def __call__(self, **kwargs):
             return plot(self._da, **kwargs)
     
    +    # we can't use functools.wraps here since that also modifies the name / qualname
    +    __doc__ = __call__.__doc__ = plot.__doc__
    +    __call__.__wrapped__ = plot  # type: ignore
    +    __call__.__annotations__ = plot.__annotations__
    +
         @functools.wraps(hist)
         def hist(self, ax=None, **kwargs):
             return hist(self._da, ax=ax, **kwargs)
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index 6497987e813..c26d105a713 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -111,6 +111,12 @@ class TestPlot(PlotTestCase):
         def setup_array(self):
             self.darray = DataArray(easy_array((2, 3, 4)))
     
    +    def test_accessor(self):
    +        from ..plot.plot import _PlotMethods
    +
    +        assert DataArray.plot is _PlotMethods
    +        assert isinstance(self.darray.plot, _PlotMethods)
    +
         def test_label_from_attrs(self):
             da = self.darray.copy()
             assert "" == label_from_attrs(da)
    @@ -2098,6 +2104,12 @@ def setUp(self):
             ds.B.attrs["units"] = "Bunits"
             self.ds = ds
     
    +    def test_accessor(self):
    +        from ..plot.dataset_plot import _Dataset_PlotMethods
    +
    +        assert Dataset.plot is _Dataset_PlotMethods
    +        assert isinstance(self.ds.plot, _Dataset_PlotMethods)
    +
         @pytest.mark.parametrize(
             "add_guide, hue_style, legend, colorbar",
             [
    
    From 2ba530026fb273a2882869a6e09ede053a0f081b Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sat, 13 Jun 2020 19:53:02 +0200
    Subject: [PATCH 052/342] provide a error summary for assert_allclose (#3847)
    
    * allow passing a callable as compat to diff_{dataset,array}_repr
    
    * rewrite assert_allclose to provide a failure summary
    
    * make sure we're comparing variables
    
    * remove spurious comments
    
    * override test_aggregate_complex with a test compatible with pint
    
    * expect the asserts to raise
    
    * xfail the tests failing due to isclose not accepting non-quantity tolerances
    
    * mark top-level function tests as xfailing if they use assert_allclose
    
    * mark test_1d_math as runnable but xfail it
    
    * bump dask and distributed
    
    * entry to whats-new.rst
    
    * attempt to fix the failing py36-min-all-deps and py36-min-nep18 CI
    
    * conditionally xfail tests using assert_allclose with pint < 0.12
    
    * xfail more tests depending on which pint version is used
    
    * try using numpy.testing.assert_allclose instead
    
    * try computing if the dask version is too old and dask.array[bool]
    
    * fix the dask version checking
    
    * convert all dask arrays to numpy when using a insufficient dask version
    ---
     ci/requirements/py36-min-all-deps.yml |  4 +-
     ci/requirements/py36-min-nep18.yml    |  4 +-
     doc/whats-new.rst                     |  2 +
     xarray/core/duck_array_ops.py         | 20 +++++++++
     xarray/core/formatting.py             | 16 ++++++-
     xarray/testing.py                     | 43 +++++++++++--------
     xarray/tests/test_duck_array_ops.py   |  2 +-
     xarray/tests/test_testing.py          | 25 +++++++++++
     xarray/tests/test_units.py            | 62 ++++++++++++++++++++++++++-
     9 files changed, 150 insertions(+), 28 deletions(-)
    
    diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
    index 86540197dcc..a72cd000680 100644
    --- a/ci/requirements/py36-min-all-deps.yml
    +++ b/ci/requirements/py36-min-all-deps.yml
    @@ -15,8 +15,8 @@ dependencies:
       - cfgrib=0.9
       - cftime=1.0
       - coveralls
    -  - dask=2.2
    -  - distributed=2.2
    +  - dask=2.5
    +  - distributed=2.5
       - flake8
       - h5netcdf=0.7
       - h5py=2.9  # Policy allows for 2.10, but it's a conflict-fest
    diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
    index a5eded49cd4..a2245e89b41 100644
    --- a/ci/requirements/py36-min-nep18.yml
    +++ b/ci/requirements/py36-min-nep18.yml
    @@ -6,8 +6,8 @@ dependencies:
       # require drastically newer packages than everything else
       - python=3.6
       - coveralls
    -  - dask=2.4
    -  - distributed=2.4
    +  - dask=2.5
    +  - distributed=2.5
       - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
       - numpy=1.17
       - pandas=0.25
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index dade282d49a..bcff60ce4df 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -252,6 +252,8 @@ New Features
       :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile`
       (:issue:`3843`, :pull:`3844`)
       By `Aaron Spring `_.
    +- Add a diff summary for `testing.assert_allclose`. (:issue:`3617`, :pull:`3847`)
    +  By `Justus Magin `_.
     
     Bug fixes
     ~~~~~~~~~
    diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
    index 1340b456cf2..76719699168 100644
    --- a/xarray/core/duck_array_ops.py
    +++ b/xarray/core/duck_array_ops.py
    @@ -6,6 +6,7 @@
     import contextlib
     import inspect
     import warnings
    +from distutils.version import LooseVersion
     from functools import partial
     
     import numpy as np
    @@ -20,6 +21,14 @@
     except ImportError:
         dask_array = None  # type: ignore
     
    +# TODO: remove after we stop supporting dask < 2.9.1
    +try:
    +    import dask
    +
    +    dask_version = dask.__version__
    +except ImportError:
    +    dask_version = None
    +
     
     def _dask_or_eager_func(
         name,
    @@ -199,8 +208,19 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8):
         """
         arr1 = asarray(arr1)
         arr2 = asarray(arr2)
    +
         lazy_equiv = lazy_array_equiv(arr1, arr2)
         if lazy_equiv is None:
    +        # TODO: remove after we require dask >= 2.9.1
    +        sufficient_dask_version = (
    +            dask_version is not None and LooseVersion(dask_version) >= "2.9.1"
    +        )
    +        if not sufficient_dask_version and any(
    +            isinstance(arr, dask_array_type) for arr in [arr1, arr2]
    +        ):
    +            arr1 = np.array(arr1)
    +            arr2 = np.array(arr2)
    +
             return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all())
         else:
             return lazy_equiv
    diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
    index d6732fc182e..bd9576a4440 100644
    --- a/xarray/core/formatting.py
    +++ b/xarray/core/formatting.py
    @@ -539,7 +539,10 @@ def extra_items_repr(extra_keys, mapping, ab_side):
         for k in a_keys & b_keys:
             try:
                 # compare xarray variable
    -            compatible = getattr(a_mapping[k], compat)(b_mapping[k])
    +            if not callable(compat):
    +                compatible = getattr(a_mapping[k], compat)(b_mapping[k])
    +            else:
    +                compatible = compat(a_mapping[k], b_mapping[k])
                 is_variable = True
             except AttributeError:
                 # compare attribute value
    @@ -596,8 +599,13 @@ def extra_items_repr(extra_keys, mapping, ab_side):
     
     
     def _compat_to_str(compat):
    +    if callable(compat):
    +        compat = compat.__name__
    +
         if compat == "equals":
             return "equal"
    +    elif compat == "allclose":
    +        return "close"
         else:
             return compat
     
    @@ -611,8 +619,12 @@ def diff_array_repr(a, b, compat):
         ]
     
         summary.append(diff_dim_summary(a, b))
    +    if callable(compat):
    +        equiv = compat
    +    else:
    +        equiv = array_equiv
     
    -    if not array_equiv(a.data, b.data):
    +    if not equiv(a.data, b.data):
             temp = [wrap_indent(short_numpy_repr(obj), start="    ") for obj in (a, b)]
             diff_data_repr = [
                 ab_side + "\n" + ab_data_repr
    diff --git a/xarray/testing.py b/xarray/testing.py
    index e7bf5f9221a..9681503414e 100644
    --- a/xarray/testing.py
    +++ b/xarray/testing.py
    @@ -1,10 +1,11 @@
     """Testing functions exposed to the user API"""
    +import functools
     from typing import Hashable, Set, Union
     
     import numpy as np
     import pandas as pd
     
    -from xarray.core import duck_array_ops, formatting
    +from xarray.core import duck_array_ops, formatting, utils
     from xarray.core.dataarray import DataArray
     from xarray.core.dataset import Dataset
     from xarray.core.indexes import default_indexes
    @@ -118,27 +119,31 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
         """
         __tracebackhide__ = True
         assert type(a) == type(b)
    -    kwargs = dict(rtol=rtol, atol=atol, decode_bytes=decode_bytes)
    +
    +    equiv = functools.partial(
    +        _data_allclose_or_equiv, rtol=rtol, atol=atol, decode_bytes=decode_bytes
    +    )
    +    equiv.__name__ = "allclose"
    +
    +    def compat_variable(a, b):
    +        a = getattr(a, "variable", a)
    +        b = getattr(b, "variable", b)
    +
    +        return a.dims == b.dims and (a._data is b._data or equiv(a.data, b.data))
    +
         if isinstance(a, Variable):
    -        assert a.dims == b.dims
    -        allclose = _data_allclose_or_equiv(a.values, b.values, **kwargs)
    -        assert allclose, f"{a.values}\n{b.values}"
    +        allclose = compat_variable(a, b)
    +        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
         elif isinstance(a, DataArray):
    -        assert_allclose(a.variable, b.variable, **kwargs)
    -        assert set(a.coords) == set(b.coords)
    -        for v in a.coords.variables:
    -            # can't recurse with this function as coord is sometimes a
    -            # DataArray, so call into _data_allclose_or_equiv directly
    -            allclose = _data_allclose_or_equiv(
    -                a.coords[v].values, b.coords[v].values, **kwargs
    -            )
    -            assert allclose, "{}\n{}".format(a.coords[v].values, b.coords[v].values)
    +        allclose = utils.dict_equiv(
    +            a.coords, b.coords, compat=compat_variable
    +        ) and compat_variable(a.variable, b.variable)
    +        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
         elif isinstance(a, Dataset):
    -        assert set(a.data_vars) == set(b.data_vars)
    -        assert set(a.coords) == set(b.coords)
    -        for k in list(a.variables) + list(a.coords):
    -            assert_allclose(a[k], b[k], **kwargs)
    -
    +        allclose = a._coord_names == b._coord_names and utils.dict_equiv(
    +            a.variables, b.variables, compat=compat_variable
    +        )
    +        assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv)
         else:
             raise TypeError("{} not supported by assertion comparison".format(type(a)))
     
    diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
    index e61881cfce3..feedcd27164 100644
    --- a/xarray/tests/test_duck_array_ops.py
    +++ b/xarray/tests/test_duck_array_ops.py
    @@ -384,7 +384,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
     
                     actual = getattr(da, func)(skipna=skipna, dim=aggdim)
                     assert_dask_array(actual, dask)
    -                assert np.allclose(
    +                np.testing.assert_allclose(
                         actual.values, np.array(expected), rtol=1.0e-4, equal_nan=True
                     )
                 except (TypeError, AttributeError, ZeroDivisionError):
    diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py
    index 041b7341ade..f4961af58e9 100644
    --- a/xarray/tests/test_testing.py
    +++ b/xarray/tests/test_testing.py
    @@ -1,3 +1,5 @@
    +import pytest
    +
     import xarray as xr
     
     
    @@ -5,3 +7,26 @@ def test_allclose_regression():
         x = xr.DataArray(1.01)
         y = xr.DataArray(1.02)
         xr.testing.assert_allclose(x, y, atol=0.01)
    +
    +
    +@pytest.mark.parametrize(
    +    "obj1,obj2",
    +    (
    +        pytest.param(
    +            xr.Variable("x", [1e-17, 2]), xr.Variable("x", [0, 3]), id="Variable",
    +        ),
    +        pytest.param(
    +            xr.DataArray([1e-17, 2], dims="x"),
    +            xr.DataArray([0, 3], dims="x"),
    +            id="DataArray",
    +        ),
    +        pytest.param(
    +            xr.Dataset({"a": ("x", [1e-17, 2]), "b": ("y", [-2e-18, 2])}),
    +            xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}),
    +            id="Dataset",
    +        ),
    +    ),
    +)
    +def test_assert_allclose(obj1, obj2):
    +    with pytest.raises(AssertionError):
    +        xr.testing.assert_allclose(obj1, obj2)
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index 5dd4a42cff0..6f4f9f768d9 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -425,6 +425,10 @@ def test_apply_ufunc_dataset(dtype):
         assert_identical(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -512,6 +516,10 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
         assert_allclose(expected_b, actual_b)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -929,6 +937,10 @@ def test_concat_dataset(variant, unit, error, dtype):
         assert_identical(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -1036,6 +1048,10 @@ def test_merge_dataarray(variant, unit, error, dtype):
         assert_allclose(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -1385,7 +1401,6 @@ def wrapper(cls):
         "test_datetime64_conversion",
         "test_timedelta64_conversion",
         "test_pandas_period_index",
    -    "test_1d_math",
         "test_1d_reduce",
         "test_array_interface",
         "test___array__",
    @@ -1413,6 +1428,13 @@ def example_1d_objects(self):
             ]:
                 yield (self.cls("x", data), data)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
    +    def test_real_and_imag(self):
    +        super().test_real_and_imag()
    +
         @pytest.mark.parametrize(
             "func",
             (
    @@ -1450,6 +1472,22 @@ def test_aggregation(self, func, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
    +    def test_aggregate_complex(self):
    +        variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m)
    +        expected = xr.Variable((), (0.5 + 1j) * unit_registry.m)
    +        actual = variable.mean()
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_allclose(expected, actual)
    +
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -1748,6 +1786,10 @@ def test_isel(self, indices, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -2224,6 +2266,10 @@ def test_repr(self, func, variant, dtype):
             # warnings or errors, but does not check the result
             func(data_array)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose",
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -2235,7 +2281,7 @@ def test_repr(self, func, variant, dtype):
                 function("mean"),
                 pytest.param(
                     function("median"),
    -                marks=pytest.mark.xfail(
    +                marks=pytest.mark.skip(
                         reason="median does not work with dataarrays yet"
                     ),
                 ),
    @@ -3283,6 +3329,10 @@ def test_head_tail_thin(self, func, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
             "func",
    @@ -3356,6 +3406,10 @@ def test_interp_reindex_indexing(self, func, unit, error, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
             "func",
    @@ -3558,6 +3612,10 @@ def test_computation(self, func, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    
    From bc5c79e5f79d8d7fbb1ed593a5413028a1bdfb36 Mon Sep 17 00:00:00 2001
    From: Noah D Brenowitz 
    Date: Mon, 15 Jun 2020 04:25:52 -0700
    Subject: [PATCH 053/342] Improve typehints of xr.Dataset.__getitem__ (#4144)
    
    * Improve typehints of xr.Dataset.__getitem__
    
    Resolves #4125
    
    * Add overload for Mapping behavior
    
    Sadly this is not working with my version of mypy. See https://github.com/python/mypy/issues/7328
    
    * Overload only Hashable inputs
    
    Given mypy's use of overloads, I think this is all we can do. If the argument is not Hashable, then return the Union type as before.
    
    * Lint
    
    * Quote the DataArray to avoid error in py3.6
    
    * Code review
    
    Co-authored-by: crusaderky 
    ---
     .pre-commit-config.yaml  |  2 +-
     ci/requirements/py38.yml |  2 +-
     xarray/core/dataset.py   | 17 +++++++++++++++--
     xarray/core/weighted.py  |  6 +++---
     4 files changed, 20 insertions(+), 7 deletions(-)
    
    diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
    index 26bf4803ef6..1d384e58a3c 100644
    --- a/.pre-commit-config.yaml
    +++ b/.pre-commit-config.yaml
    @@ -16,7 +16,7 @@ repos:
         hooks:
           - id: flake8
       - repo: https://github.com/pre-commit/mirrors-mypy
    -    rev: v0.761  # Must match ci/requirements/*.yml
    +    rev: v0.780  # Must match ci/requirements/*.yml
         hooks:
           - id: mypy
       # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
    diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
    index 24602f884e9..7dff3a1bd97 100644
    --- a/ci/requirements/py38.yml
    +++ b/ci/requirements/py38.yml
    @@ -22,7 +22,7 @@ dependencies:
       - isort
       - lxml    # Optional dep of pydap
       - matplotlib
    -  - mypy=0.761  # Must match .pre-commit-config.yaml
    +  - mypy=0.780  # Must match .pre-commit-config.yaml
       - nc-time-axis
       - netcdf4
       - numba
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 2958cad89b2..a024324bcb1 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -27,6 +27,7 @@
         TypeVar,
         Union,
         cast,
    +    overload,
     )
     
     import numpy as np
    @@ -1241,13 +1242,25 @@ def loc(self) -> _LocIndexer:
             """
             return _LocIndexer(self)
     
    -    def __getitem__(self, key: Any) -> "Union[DataArray, Dataset]":
    +    # FIXME https://github.com/python/mypy/issues/7328
    +    @overload
    +    def __getitem__(self, key: Mapping) -> "Dataset":  # type: ignore
    +        ...
    +
    +    @overload
    +    def __getitem__(self, key: Hashable) -> "DataArray":  # type: ignore
    +        ...
    +
    +    @overload
    +    def __getitem__(self, key: Any) -> "Dataset":
    +        ...
    +
    +    def __getitem__(self, key):
             """Access variables or coordinates this dataset as a
             :py:class:`~xarray.DataArray`.
     
             Indexing with a list of names will return a new ``Dataset`` object.
             """
    -        # TODO(shoyer): type this properly: https://github.com/python/mypy/issues/7328
             if utils.is_dict_like(key):
                 return self.isel(**cast(Mapping, key))
     
    diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
    index 21ed06ea85f..fa143342c06 100644
    --- a/xarray/core/weighted.py
    +++ b/xarray/core/weighted.py
    @@ -72,11 +72,11 @@ class Weighted:
         def __init__(self, obj: "DataArray", weights: "DataArray") -> None:
             ...
     
    -    @overload  # noqa: F811
    -    def __init__(self, obj: "Dataset", weights: "DataArray") -> None:  # noqa: F811
    +    @overload
    +    def __init__(self, obj: "Dataset", weights: "DataArray") -> None:
             ...
     
    -    def __init__(self, obj, weights):  # noqa: F811
    +    def __init__(self, obj, weights):
             """
             Create a Weighted object
     
    
    From 6f272b5693913d4e6a989cbf2e8b18d02a71cb4c Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Mon, 15 Jun 2020 23:35:43 -0700
    Subject: [PATCH 054/342] Fix failing upstream-dev build & remove docs build
     (#4160)
    
    Instead, we'll use RTD's new doc builder instead. For an example, click on
    "docs/readthedocs.org:xray" below or look at GH4159
    ---
     azure-pipelines.yml  | 18 ------------------
     ci/azure/install.yml |  5 ++++-
     2 files changed, 4 insertions(+), 19 deletions(-)
    
    diff --git a/azure-pipelines.yml b/azure-pipelines.yml
    index ff85501c555..e04c8f74f68 100644
    --- a/azure-pipelines.yml
    +++ b/azure-pipelines.yml
    @@ -108,21 +108,3 @@ jobs:
           python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml
           python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml
         displayName: minimum versions policy
    -
    -- job: Docs
    -  pool:
    -    vmImage: 'ubuntu-16.04'
    -  steps:
    -  - template: ci/azure/install.yml
    -    parameters:
    -      env_file: ci/requirements/doc.yml
    -  - bash: |
    -      source activate xarray-tests
    -      # Replicate the exact environment created by the readthedocs CI
    -      conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme
    -    displayName: Replicate readthedocs CI environment
    -  - bash: |
    -      source activate xarray-tests
    -      cd doc
    -      sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html
    -    displayName: Build HTML docs
    diff --git a/ci/azure/install.yml b/ci/azure/install.yml
    index eff229e863a..83895eebe01 100644
    --- a/ci/azure/install.yml
    +++ b/ci/azure/install.yml
    @@ -10,6 +10,8 @@ steps:
         conda env create -n xarray-tests --file ${{ parameters.env_file }}
       displayName: Install conda dependencies
     
    +# TODO: add sparse back in, once Numba works with the development version of
    +# NumPy again: https://github.com/pydata/xarray/issues/4146 
     - bash: |
         source activate xarray-tests
         conda uninstall -y --force \
    @@ -23,7 +25,8 @@ steps:
             cftime \
             rasterio \
             pint \
    -        bottleneck
    +        bottleneck \
    +        sparse
         python -m pip install \
             -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \
             --no-deps \
    
    From 52bb0a22ed25195a1b47b693f1881c90f15983e6 Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Wed, 17 Jun 2020 09:50:56 -0700
    Subject: [PATCH 055/342] Update issue templates inspired/based on dask (#4154)
    
    * Update issue templates based on dask
    
    * add config.yml for issue template
    ---
     .github/ISSUE_TEMPLATE/bug-report.md      | 39 +++++++++++++++++++++++
     .github/ISSUE_TEMPLATE/bug_report.md      | 35 --------------------
     .github/ISSUE_TEMPLATE/config.yml         |  5 +++
     .github/ISSUE_TEMPLATE/feature-request.md | 22 +++++++++++++
     4 files changed, 66 insertions(+), 35 deletions(-)
     create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md
     delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
     create mode 100644 .github/ISSUE_TEMPLATE/config.yml
     create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md
    
    diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
    new file mode 100644
    index 00000000000..02bc5d0f7b0
    --- /dev/null
    +++ b/.github/ISSUE_TEMPLATE/bug-report.md
    @@ -0,0 +1,39 @@
    +---
    +name: Bug report
    +about: Create a report to help us improve
    +title: ''
    +labels: ''
    +assignees: ''
    +
    +---
    +
    +
    +
    +**What happened**:
    +
    +**What you expected to happen**:
    +
    +**Minimal Complete Verifiable Example**:
    +
    +```python
    +# Put your MCVE code here
    +```
    +
    +**Anything else we need to know?**:
    +
    +**Environment**:
    +
    +
    Output of xr.show_versions() + + + + +
    diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index c712cf27979..00000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Bug report / Feature request -about: 'Post a problem or idea' -title: '' -labels: '' -assignees: '' - ---- - - - - -#### MCVE Code Sample - - -```python -# Your code here - -``` - -#### Expected Output - - -#### Problem Description - - - -#### Versions - -
    Output of xr.show_versions() - - - - -
    diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000000..3389fbfe071 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: General Question + url: https://stackoverflow.com/questions/tagged/python-xarray + about: "If you have a question like *How do I append to an xarray.Dataset?* then please ask on Stack Overflow using the #python-xarray tag." diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 00000000000..7021fe490aa --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,22 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + + + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context about the feature request here. From ad0a76bbf4a88ab03d495ba74423b0daa08d75ec Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Wed, 17 Jun 2020 12:52:29 -0400 Subject: [PATCH 056/342] drop eccodes in docs (#4162) Co-authored-by: Ray Bell --- doc/io.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 1f854586202..4aac5e0b6f7 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -994,8 +994,8 @@ be done directly from zarr, as described in the GRIB format via cfgrib ---------------------- -xarray supports reading GRIB files via ECMWF cfgrib_ python driver and ecCodes_ -C-library, if they are installed. To open a GRIB file supply ``engine='cfgrib'`` +xarray supports reading GRIB files via ECMWF cfgrib_ python driver, +if it is installed. To open a GRIB file supply ``engine='cfgrib'`` to :py:func:`open_dataset`: .. ipython:: @@ -1003,13 +1003,11 @@ to :py:func:`open_dataset`: In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") -We recommend installing ecCodes via conda:: +We recommend installing cfgrib via conda:: - conda install -c conda-forge eccodes - pip install cfgrib + conda install -c conda-forge cfgrib .. _cfgrib: https://github.com/ecmwf/cfgrib -.. _ecCodes: https://confluence.ecmwf.int/display/ECC/ecCodes+Home .. _io.pynio: From 66e77309ad48a5ad0dbe774c5500bb52775b9372 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 17 Jun 2020 22:40:07 +0200 Subject: [PATCH 057/342] pint support for Dataset (#3975) * remove the xfail marks from all aggregations except prod and np.median * rewrite the aggregation tests * rewrite the repr tests it still does not check the content of the repr, though * rewrite some more tests * simplify the numpy-method-with-args tests * always use the same data units unless the compatibility is tested * partially rewrite more tests * rewrite combine_first This also adds tests for units in indexes, which are by default stripped. * simplify the comparisons test a bit * skip the tests for identical * remove the map_values function * only call convert_units if necessary * use assert_units_equal and assert_equal in broadcast_like and skip it * remove the conditional skip since pint now supports __array_function__ * only skip the broadcast_like tests if we attempt to put units in indexes * remove the xfail mark from the where tests * reimplement the broadcast_equals tests * reimplement the tests on stacked arrays * refactor the to_stacked_array tests this test is marked as skipped because the unit registry always returns numpy.array objects which are not hashable, so the initial dataset with units cannot be constructed (the result of to_stacked_array wouldn't be correct either because IndexVariable doesn't support units) * fix the stacking and reordering tests * don't create a coordinate for the isel tests * separate the tests for units in dims from the tests for units in data * refactor the dataset constructor tests * fix the repr tests * raise on all warnings * rename merge_mappings to zip_mappings * rename merge_dicts to merge_mappings * make the missing value filling tests raise on warnings * remove a leftover assert_equal_with_units * refactor the sel tests * make the loc tests a slightly modified copy of the sel tests * make the drop_sel tests a slightly modified version of the sel tests * refactor the head / tail / thin tests * refactor the squeeze tests to not have multiple tests per case * skip the head / tail / thin tests with units in dimensions * combine the interp and reindex tests * combine the interp_like and reindex_like tests * refactor the computation tests * rewrite the computation objects tests * rewrite the resample tests * rewrite the grouped operations tests * rewrite the content manipulation tests * refactor the merge tests * remove the old assert_equal_with_units function * xfail the groupby_bins tests for now * fix and use allclose * filterwarnings for the whole TestDataset class * modify the squeeze tests to not use units in indexes * replace skip with xfail * update whats-new.rst * update the xfail reason for the rolling_exp tests * temporarily use pip to install pint since the feedstock seems to take a while * don't use pip to install pint * update the xfail to require at least 0.12.1 * xfail the prod tests * filter only UnitStrippedWarning * remove unncessary commas --- ci/requirements/py36-min-nep18.yml | 2 +- doc/whats-new.rst | 2 +- xarray/tests/test_units.py | 1463 +++++++++++++--------------- 3 files changed, 699 insertions(+), 768 deletions(-) diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index a2245e89b41..48b9c057260 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -11,7 +11,7 @@ dependencies: - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - - pint=0.11 + - pint - pip - pytest - pytest-cov diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bcff60ce4df..4b5bb1e491f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -72,7 +72,7 @@ New Features - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`) By `Kai Mühlbauer `_. -- More support for unit aware arrays with pint (:pull:`3643`) +- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`) By `Justus Magin `_. - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even without ``append_dim``, as long as dimension sizes do not change. diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 6f4f9f768d9..b477e8cccb2 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -7,9 +7,8 @@ import pytest import xarray as xr -from xarray.core import formatting from xarray.core.npcompat import IS_NEP18_ACTIVE -from xarray.testing import assert_allclose, assert_identical +from xarray.testing import assert_allclose, assert_equal, assert_identical from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects @@ -27,11 +26,6 @@ pytest.mark.skipif( not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled" ), - # TODO: remove this once pint has a released version with __array_function__ - pytest.mark.skipif( - not hasattr(unit_registry.Quantity, "__array_function__"), - reason="pint does not implement __array_function__ yet", - ), # pytest.mark.filterwarnings("ignore:::pint[.*]"), ] @@ -51,10 +45,23 @@ def dimensionality(obj): def compatible_mappings(first, second): return { key: is_compatible(unit1, unit2) - for key, (unit1, unit2) in merge_mappings(first, second) + for key, (unit1, unit2) in zip_mappings(first, second) } +def merge_mappings(base, *mappings): + result = base.copy() + for m in mappings: + result.update(m) + + return result + + +def zip_mappings(*mappings): + for key in set(mappings[0]).intersection(*mappings[1:]): + yield key, tuple(m[key] for m in mappings) + + def array_extract_units(obj): if isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)): obj = obj.data @@ -257,50 +264,11 @@ def assert_units_equal(a, b): assert extract_units(a) == extract_units(b) -def assert_equal_with_units(a, b): - # works like xr.testing.assert_equal, but also explicitly checks units - # so, it is more like assert_identical - __tracebackhide__ = True - - if isinstance(a, xr.Dataset) or isinstance(b, xr.Dataset): - a_units = extract_units(a) - b_units = extract_units(b) - - a_without_units = strip_units(a) - b_without_units = strip_units(b) - - assert a_without_units.equals(b_without_units), formatting.diff_dataset_repr( - a, b, "equals" - ) - assert a_units == b_units - else: - a = a if not isinstance(a, (xr.DataArray, xr.Variable)) else a.data - b = b if not isinstance(b, (xr.DataArray, xr.Variable)) else b.data - - assert type(a) == type(b) or ( - isinstance(a, Quantity) and isinstance(b, Quantity) - ) - - # workaround until pint implements allclose in __array_function__ - if isinstance(a, Quantity) or isinstance(b, Quantity): - assert ( - hasattr(a, "magnitude") and hasattr(b, "magnitude") - ) and np.allclose(a.magnitude, b.magnitude, equal_nan=True) - assert (hasattr(a, "units") and hasattr(b, "units")) and a.units == b.units - else: - assert np.allclose(a, b, equal_nan=True) - - @pytest.fixture(params=[float, int]) def dtype(request): return request.param -def merge_mappings(*mappings): - for key in set(mappings[0]).intersection(*mappings[1:]): - yield key, tuple(m[key] for m in mappings) - - def merge_args(default_args, new_args): from itertools import zip_longest @@ -427,7 +395,7 @@ def test_apply_ufunc_dataset(dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -518,7 +486,7 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -939,7 +907,7 @@ def test_concat_dataset(variant, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -1050,7 +1018,7 @@ def test_merge_dataarray(variant, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -1430,7 +1398,7 @@ def example_1d_objects(self): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) def test_real_and_imag(self): super().test_real_and_imag() @@ -1474,7 +1442,7 @@ def test_aggregation(self, func, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) def test_aggregate_complex(self): variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m) @@ -1486,7 +1454,7 @@ def test_aggregate_complex(self): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "func", @@ -1788,7 +1756,7 @@ def test_isel(self, indices, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -1928,7 +1896,7 @@ def test_squeeze(self, dtype): pytest.param( method("quantile", q=[0.25, 0.75]), marks=pytest.mark.xfail( - LooseVersion(pint.__version__) < "0.12", + LooseVersion(pint.__version__) <= "0.12", reason="quantile / nanquantile not implemented yet", ), ), @@ -2268,7 +2236,7 @@ def test_repr(self, func, variant, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose", + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose", ) @pytest.mark.parametrize( "func", @@ -3331,7 +3299,7 @@ def test_head_tail_thin(self, func, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( @@ -3408,7 +3376,7 @@ def test_interp_reindex_indexing(self, func, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( @@ -3577,7 +3545,7 @@ def test_stacking_reordering(self, func, dtype): pytest.param( method("quantile", q=[0.25, 0.75]), marks=pytest.mark.xfail( - LooseVersion(pint.__version__) < "0.12", + LooseVersion(pint.__version__) <= "0.12", reason="quantile / nanquantile not implemented yet", ), ), @@ -3614,7 +3582,7 @@ def test_computation(self, func, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "func", @@ -3630,7 +3598,9 @@ def test_computation(self, func, dtype): ), pytest.param( method("rolling_exp", y=3), - marks=pytest.mark.xfail(reason="units not supported by numbagg"), + marks=pytest.mark.xfail( + reason="numbagg functions are not supported by pint" + ), ), ), ids=repr, @@ -3676,7 +3646,7 @@ def test_resample(self, dtype): pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), marks=pytest.mark.xfail( - LooseVersion(pint.__version__) < "0.12", + LooseVersion(pint.__version__) <= "0.12", reason="quantile / nanquantile not implemented yet", ), ), @@ -3711,15 +3681,16 @@ def test_grouped_operations(self, func, dtype): xr.testing.assert_identical(expected, actual) +@pytest.mark.filterwarnings("error::pint.UnitStrippedWarning") class TestDataset: @pytest.mark.parametrize( "unit,error", ( - pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param(1, xr.MergeError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + unit_registry.dimensionless, xr.MergeError, id="dimensionless" ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="same_unit"), ), @@ -3728,11 +3699,10 @@ class TestDataset: "shared", ( "nothing", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), pytest.param( - "coords", - marks=pytest.mark.xfail(reason="reindex does not work with pint yet"), + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") ), + "coords", ), ) def test_init(self, shared, unit, error, dtype): @@ -3740,60 +3710,53 @@ def test_init(self, shared, unit, error, dtype): scaled_unit = unit_registry.mm a = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa - b = np.linspace(-1, 0, 12).astype(dtype) * unit_registry.Pa - - raw_x = np.arange(a.shape[0]) - x = raw_x * original_unit - x2 = x.to(scaled_unit) - - raw_y = np.arange(b.shape[0]) - y = raw_y * unit - y_units = unit if isinstance(y, unit_registry.Quantity) else None - if isinstance(y, unit_registry.Quantity): - if y.check(scaled_unit): - y2 = y.to(scaled_unit) - else: - y2 = y * 1000 - y2_units = y2.units - else: - y2 = y * 1000 - y2_units = None + b = np.linspace(-1, 0, 10).astype(dtype) * unit_registry.degK + + values_a = np.arange(a.shape[0]) + dim_a = values_a * original_unit + coord_a = dim_a.to(scaled_unit) + + values_b = np.arange(b.shape[0]) + dim_b = values_b * unit + coord_b = ( + dim_b.to(scaled_unit) + if unit_registry.is_compatible_with(dim_b, scaled_unit) + and unit != scaled_unit + else dim_b * 1000 + ) variants = { - "nothing": ({"x": x, "x2": ("x", x2)}, {"y": y, "y2": ("y", y2)}), - "dims": ( - {"x": x, "x2": ("x", strip_units(x2))}, - {"x": y, "y2": ("x", strip_units(y2))}, + "nothing": ({}, {}), + "dims": ({"x": dim_a}, {"x": dim_b}), + "coords": ( + {"x": values_a, "y": ("x", coord_a)}, + {"x": values_b, "y": ("x", coord_b)}, ), - "coords": ({"x": raw_x, "y": ("x", x2)}, {"x": raw_y, "y": ("x", y2)}), } coords_a, coords_b = variants.get(shared) dims_a, dims_b = ("x", "y") if shared == "nothing" else ("x", "x") - arr1 = xr.DataArray(data=a, coords=coords_a, dims=dims_a) - arr2 = xr.DataArray(data=b, coords=coords_b, dims=dims_b) + a = xr.DataArray(data=a, coords=coords_a, dims=dims_a) + b = xr.DataArray(data=b, coords=coords_b, dims=dims_b) + if error is not None and shared != "nothing": with pytest.raises(error): - xr.Dataset(data_vars={"a": arr1, "b": arr2}) + xr.Dataset(data_vars={"a": a, "b": b}) return - actual = xr.Dataset(data_vars={"a": arr1, "b": arr2}) + actual = xr.Dataset(data_vars={"a": a, "b": b}) - expected_units = { - "a": a.units, - "b": b.units, - "x": x.units, - "x2": x2.units, - "y": y_units, - "y2": y2_units, - } + units = merge_mappings( + extract_units(a.rename("a")), extract_units(b.rename("b")) + ) expected = attach_units( - xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}), - expected_units, + xr.Dataset(data_vars={"a": strip_units(a), "b": strip_units(b)}), units ) - assert_equal_with_units(actual, expected) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr")) @@ -3801,48 +3764,45 @@ def test_init(self, shared, unit, error, dtype): @pytest.mark.parametrize( "variant", ( + "data", pytest.param( - "with_dims", + "dims", marks=pytest.mark.xfail(reason="units in indexes are not supported"), ), - pytest.param("with_coords"), - pytest.param("without_coords"), + "coords", ), ) - @pytest.mark.filterwarnings("error:::pint[.*]") def test_repr(self, func, variant, dtype): - array1 = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.Pa - array2 = np.linspace(0, 1, 10, dtype=dtype) * unit_registry.degK + unit1, unit2 = ( + (unit_registry.Pa, unit_registry.degK) if variant == "data" else (1, 1) + ) + + array1 = np.linspace(1, 2, 10, dtype=dtype) * unit1 + array2 = np.linspace(0, 1, 10, dtype=dtype) * unit2 x = np.arange(len(array1)) * unit_registry.s y = x.to(unit_registry.ms) variants = { - "with_dims": {"x": x}, - "with_coords": {"y": ("x", y)}, - "without_coords": {}, + "dims": {"x": x}, + "coords": {"y": ("x", y)}, + "data": {}, } - data_array = xr.Dataset( + ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("x", array2)}, coords=variants.get(variant), ) # FIXME: this just checks that the repr does not raise # warnings or errors, but does not check the result - func(data_array) + func(ds) @pytest.mark.parametrize( "func", ( - pytest.param( - function("all"), - marks=pytest.mark.xfail(reason="not implemented by pint"), - ), - pytest.param( - function("any"), - marks=pytest.mark.xfail(reason="not implemented by pint"), - ), + function("all"), + function("any"), function("argmax"), function("argmin"), function("max"), @@ -3850,28 +3810,19 @@ def test_repr(self, func, variant, dtype): function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail( - reason="np.median does not work with dataset yet" - ), + marks=pytest.mark.xfail(reason="median does not work with dataset yet"), ), function("sum"), pytest.param( function("prod"), - marks=pytest.mark.xfail(reason="not implemented by pint"), + marks=pytest.mark.xfail(reason="prod does not work with dataset yet"), ), function("std"), function("var"), function("cumsum"), - pytest.param( - function("cumprod"), - marks=pytest.mark.xfail(reason="fails within xarray"), - ), - pytest.param( - method("all"), marks=pytest.mark.xfail(reason="not implemented by pint") - ), - pytest.param( - method("any"), marks=pytest.mark.xfail(reason="not implemented by pint") - ), + function("cumprod"), + method("all"), + method("any"), method("argmax"), method("argmin"), method("max"), @@ -3881,68 +3832,49 @@ def test_repr(self, func, variant, dtype): method("sum"), pytest.param( method("prod"), - marks=pytest.mark.xfail(reason="not implemented by pint"), + marks=pytest.mark.xfail(reason="prod does not work with dataset yet"), ), method("std"), method("var"), method("cumsum"), - pytest.param( - method("cumprod"), marks=pytest.mark.xfail(reason="fails within xarray") - ), + method("cumprod"), ), ids=repr, ) def test_aggregation(self, func, dtype): - unit_a = ( - unit_registry.Pa if func.name != "cumprod" else unit_registry.dimensionless - ) - unit_b = ( - unit_registry.kg / unit_registry.m ** 3 + unit_a, unit_b = ( + (unit_registry.Pa, unit_registry.degK) if func.name != "cumprod" - else unit_registry.dimensionless - ) - a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x") - b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x") - x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x") - y = xr.DataArray( - data=np.arange(10, 20).astype(dtype) * unit_registry.s, dims="x" + else (unit_registry.dimensionless, unit_registry.dimensionless) ) - ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y}) + a = np.linspace(0, 1, 10).astype(dtype) * unit_a + b = np.linspace(-1, 0, 10).astype(dtype) * unit_b + + ds = xr.Dataset({"a": ("x", a), "b": ("x", b)}) + + units_a = array_extract_units(func(a)) + units_b = array_extract_units(func(b)) + units = {"a": units_a, "b": units_b} actual = func(ds) - expected = attach_units( - func(strip_units(ds)), - { - "a": extract_units(func(a)).get(None), - "b": extract_units(func(b)).get(None), - }, - ) + expected = attach_units(func(strip_units(ds)), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize("property", ("imag", "real")) def test_numpy_properties(self, property, dtype): - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray( - data=np.linspace(0, 1, 10) * unit_registry.Pa, dims="x" - ), - "b": xr.DataArray( - data=np.linspace(-1, 0, 15) * unit_registry.Pa, dims="y" - ), - }, - coords={ - "x": np.arange(10) * unit_registry.m, - "y": np.arange(15) * unit_registry.s, - }, - ) + a = np.linspace(0, 1, 10) * unit_registry.Pa + b = np.linspace(-1, 0, 15) * unit_registry.degK + ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) units = extract_units(ds) actual = getattr(ds, property) expected = attach_units(getattr(strip_units(ds), property), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", @@ -3956,31 +3888,19 @@ def test_numpy_properties(self, property, dtype): ids=repr, ) def test_numpy_methods(self, func, dtype): - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray( - data=np.linspace(1, -1, 10) * unit_registry.Pa, dims="x" - ), - "b": xr.DataArray( - data=np.linspace(-1, 1, 15) * unit_registry.Pa, dims="y" - ), - }, - coords={ - "x": np.arange(10) * unit_registry.m, - "y": np.arange(15) * unit_registry.s, - }, - ) - units = { - "a": array_extract_units(func(ds.a)), - "b": array_extract_units(func(ds.b)), - "x": unit_registry.m, - "y": unit_registry.s, - } + a = np.linspace(1, -1, 10) * unit_registry.Pa + b = np.linspace(-1, 1, 15) * unit_registry.degK + ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) + + units_a = array_extract_units(func(a)) + units_b = array_extract_units(func(b)) + units = {"a": units_a, "b": units_b} actual = func(ds) expected = attach_units(func(strip_units(ds)), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr) @pytest.mark.parametrize( @@ -3997,21 +3917,13 @@ def test_numpy_methods(self, func, dtype): ) def test_numpy_methods_with_args(self, func, unit, error, dtype): data_unit = unit_registry.m - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=np.arange(10) * data_unit, dims="x"), - "b": xr.DataArray(data=np.arange(15) * data_unit, dims="y"), - }, - coords={ - "x": np.arange(10) * unit_registry.m, - "y": np.arange(15) * unit_registry.s, - }, - ) + a = np.linspace(0, 10, 15) * unit_registry.m + b = np.linspace(-2, 12, 20) * unit_registry.m + ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) units = extract_units(ds) kwargs = { - key: (value * unit if isinstance(value, (int, float)) else value) - for key, value in func.kwargs.items() + key: array_attach_units(value, unit) for key, value in func.kwargs.items() } if error is not None: @@ -4028,7 +3940,8 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): actual = func(ds, **kwargs) expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -4058,22 +3971,13 @@ def test_missing_value_detection(self, func, dtype): * unit_registry.Pa ) - x = np.arange(array1.shape[0]) * unit_registry.m - y = np.arange(array1.shape[1]) * unit_registry.m - z = np.arange(array2.shape[0]) * unit_registry.m - - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("z", "x")), - }, - coords={"x": x, "y": y, "z": z}, - ) + ds = xr.Dataset({"a": (("x", "y"), array1), "b": (("z", "x"), array2)}) expected = func(strip_units(ds)) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose the unit") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -4087,23 +3991,14 @@ def test_missing_value_filling(self, func, dtype): * unit_registry.Pa ) - x = np.arange(len(array1)) - - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("y", array2)}) + units = extract_units(ds) - expected = attach_units( - func(strip_units(ds), dim="x"), - {"a": unit_registry.degK, "b": unit_registry.Pa}, - ) + expected = attach_units(func(strip_units(ds), dim="x"), units) actual = func(ds, dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -4113,14 +4008,7 @@ def test_missing_value_filling(self, func, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail( - reason="where converts the array, not the fill value" - ), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit",), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4141,30 +4029,26 @@ def test_fillna(self, fill_value, unit, error, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.m ) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - } - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + value = fill_value * unit + units = extract_units(ds) if error is not None: with pytest.raises(error): - ds.fillna(value=fill_value * unit) + ds.fillna(value=value) return - actual = ds.fillna(value=fill_value * unit) + actual = ds.fillna(value=value) expected = attach_units( strip_units(ds).fillna( - value=strip_units( - convert_units(fill_value * unit, {None: unit_registry.m}) - ) + value=strip_units(convert_units(value, {None: unit_registry.m})) ), - {"a": unit_registry.m, "b": unit_registry.m}, + units, ) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) def test_dropna(self, dtype): array1 = ( @@ -4175,22 +4059,14 @@ def test_dropna(self, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.Pa ) - x = np.arange(len(array1)) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) - expected = attach_units( - strip_units(ds).dropna(dim="x"), - {"a": unit_registry.degK, "b": unit_registry.Pa}, - ) + expected = attach_units(strip_units(ds).dropna(dim="x"), units) actual = ds.dropna(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit", @@ -4211,34 +4087,28 @@ def test_isin(self, unit, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.m ) - x = np.arange(len(array1)) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype) values = raw_values * unit - if ( - isinstance(values, unit_registry.Quantity) - and values.check(unit_registry.m) - and unit != unit_registry.m - ): - raw_values = values.to(unit_registry.m).magnitude + converted_values = ( + convert_units(values, {None: unit_registry.m}) + if is_compatible(unit, unit_registry.m) + else values + ) - expected = strip_units(ds).isin(raw_values) - if not isinstance(values, unit_registry.Quantity) or not values.check( - unit_registry.m - ): + expected = strip_units(ds).isin(strip_units(converted_values)) + # TODO: use `unit_registry.is_compatible_with(unit, unit_registry.m)` instead. + # Needs `pint>=0.12.1`, though, so we probably should wait until that is released. + if not is_compatible(unit, unit_registry.m): expected.a[:] = False expected.b[:] = False + actual = ds.isin(values) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") @@ -4260,13 +4130,8 @@ def test_where(self, variant, unit, error, dtype): array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": np.arange(len(array1))}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) condition = ds < 0.5 * original_unit other = np.linspace(-2, -1, 10).astype(dtype) * unit @@ -4288,15 +4153,13 @@ def test_where(self, variant, unit, error, dtype): for key, value in kwargs.items() } - expected = attach_units( - strip_units(ds).where(**kwargs_without_units), - {"a": original_unit, "b": original_unit}, - ) + expected = attach_units(strip_units(ds).where(**kwargs_without_units), units,) actual = ds.where(**kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="interpolate strips units") + @pytest.mark.xfail(reason="interpolate_na uses numpy.vectorize") def test_interpolate_na(self, dtype): array1 = ( np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) @@ -4306,24 +4169,15 @@ def test_interpolate_na(self, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.Pa ) - x = np.arange(len(array1)) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) - expected = attach_units( - strip_units(ds).interpolate_na(dim="x"), - {"a": unit_registry.degK, "b": unit_registry.Pa}, - ) + expected = attach_units(strip_units(ds).interpolate_na(dim="x"), units,) actual = ds.interpolate_na(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="wrong argument order for `where`") @pytest.mark.parametrize( "unit,error", ( @@ -4336,31 +4190,40 @@ def test_interpolate_na(self, dtype): pytest.param(unit_registry.m, None, id="same_unit"), ), ) - def test_combine_first(self, unit, error, dtype): + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ), + ) + def test_combine_first(self, variant, unit, error, dtype): + variants = { + "data": (unit_registry.m, unit, 1, 1), + "dims": (1, 1, unit_registry.m, unit), + } + data_unit, other_data_unit, dims_unit, other_dims_unit = variants.get(variant) + array1 = ( - np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) - * unit_registry.m + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * data_unit ) array2 = ( - np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) - * unit_registry.m + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * data_unit ) - x = np.arange(len(array1)) + x = np.arange(len(array1)) * dims_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, + data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}, ) - other_array1 = np.ones_like(array1) * unit - other_array2 = -1 * np.ones_like(array2) * unit + units = extract_units(ds) + + other_array1 = np.ones_like(array1) * other_data_unit + other_array2 = np.full_like(array2, fill_value=-1) * other_data_unit + other_x = (np.arange(array1.shape[0]) + 5) * other_dims_unit other = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=other_array1, dims="x"), - "b": xr.DataArray(data=other_array2, dims="x"), - }, - coords={"x": np.arange(array1.shape[0])}, + data_vars={"a": ("x", other_array1), "b": ("x", other_array2)}, + coords={"x": other_x}, ) if error is not None: @@ -4370,16 +4233,13 @@ def test_combine_first(self, unit, error, dtype): return expected = attach_units( - strip_units(ds).combine_first( - strip_units( - convert_units(other, {"a": unit_registry.m, "b": unit_registry.m}) - ) - ), - {"a": unit_registry.m, "b": unit_registry.m}, + strip_units(ds).combine_first(strip_units(convert_units(other, units))), + units, ) actual = ds.combine_first(other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit", @@ -4392,7 +4252,7 @@ def test_combine_first(self, unit, error, dtype): ), ) @pytest.mark.parametrize( - "variation", + "variant", ( "data", pytest.param( @@ -4401,50 +4261,67 @@ def test_combine_first(self, unit, error, dtype): "coords", ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) - def test_comparisons(self, func, variation, unit, dtype): - def is_compatible(a, b): - a = a if a is not None else 1 - b = b if b is not None else 1 - quantity = np.arange(5) * a - - return a == b or quantity.check(b) - + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip("behaviour of identical is unclear"), + ), + ), + ids=repr, + ) + def test_comparisons(self, func, variant, unit, dtype): array1 = np.linspace(0, 5, 10).astype(dtype) array2 = np.linspace(-5, 0, 10).astype(dtype) coord = np.arange(len(array1)).astype(dtype) - original_unit = unit_registry.m - quantity1 = array1 * original_unit - quantity2 = array2 * original_unit - x = coord * original_unit - y = coord * original_unit + variants = { + "data": (unit_registry.m, 1, 1), + "dims": (1, unit_registry.m, 1), + "coords": (1, 1, unit_registry.m), + } + data_unit, dim_unit, coord_unit = variants.get(variant) - units = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)} - data_unit, dim_unit, coord_unit = units.get(variation) + a = array1 * data_unit + b = array2 * data_unit + x = coord * dim_unit + y = coord * coord_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=quantity1, dims="x"), - "b": xr.DataArray(data=quantity2, dims="x"), - }, - coords={"x": x, "y": ("x", y)}, + data_vars={"a": ("x", a), "b": ("x", b)}, coords={"x": x, "y": ("x", y)}, ) + units = extract_units(ds) + + other_variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + other_data_unit, other_dim_unit, other_coord_unit = other_variants.get(variant) other_units = { - "a": data_unit if quantity1.check(data_unit) else None, - "b": data_unit if quantity2.check(data_unit) else None, - "x": dim_unit if x.check(dim_unit) else None, - "y": coord_unit if y.check(coord_unit) else None, + "a": other_data_unit, + "b": other_data_unit, + "x": other_dim_unit, + "y": other_coord_unit, } - other = attach_units(strip_units(convert_units(ds, other_units)), other_units) - units = extract_units(ds) + to_convert = { + key: unit if is_compatible(unit, reference) else None + for key, (unit, reference) in zip_mappings(units, other_units) + } + # convert units where possible, then attach all units to the converted dataset + other = attach_units(strip_units(convert_units(ds, to_convert)), other_units) other_units = extract_units(other) + # make sure all units are compatible and only then try to + # convert and compare values equal_ds = all( - is_compatible(units[name], other_units[name]) for name in units.keys() + is_compatible(unit, other_unit) + for _, (unit, other_unit) in zip_mappings(units, other_units) ) and (strip_units(ds).equals(strip_units(convert_units(other, units)))) equal_units = units == other_units expected = equal_ds and (func.name != "identical" or equal_units) @@ -4453,6 +4330,9 @@ def is_compatible(a, b): assert expected == actual + # TODO: eventually use another decorator / wrapper function that + # applies a filter to the parametrize combinations: + # we only need a single test for data @pytest.mark.parametrize( "unit", ( @@ -4463,14 +4343,29 @@ def is_compatible(a, b): pytest.param(unit_registry.m, id="identical_unit"), ), ) - def test_broadcast_like(self, unit, dtype): - array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa - array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ), + ) + def test_broadcast_like(self, variant, unit, dtype): + variants = { + "data": ((unit_registry.m, unit), (1, 1)), + "dims": ((1, 1), (unit_registry.m, unit)), + } + (data_unit1, data_unit2), (dim_unit1, dim_unit2) = variants.get(variant) - x1 = np.arange(2) * unit_registry.m - x2 = np.arange(2) * unit - y1 = np.array([0]) * unit_registry.m - y2 = np.arange(3) * unit + array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1 + array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2 + + x1 = np.arange(2) * dim_unit1 + x2 = np.arange(2) * dim_unit2 + y1 = np.array([0]) * dim_unit1 + y2 = np.arange(3) * dim_unit2 ds1 = xr.Dataset( data_vars={"a": (("x", "y"), array1)}, coords={"x": x1, "y": y1} @@ -4484,7 +4379,8 @@ def test_broadcast_like(self, unit, dtype): ) actual = ds1.broadcast_like(ds2) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit", @@ -4497,32 +4393,25 @@ def test_broadcast_like(self, unit, dtype): ), ) def test_broadcast_equals(self, unit, dtype): + # TODO: does this use indexes? left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m left_array2 = np.zeros(shape=(3, 6), dtype=dtype) * unit_registry.m right_array1 = np.ones(shape=(2,)) * unit - right_array2 = np.ones(shape=(3,)) * unit + right_array2 = np.zeros(shape=(3,)) * unit left = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=left_array1, dims=("x", "y")), - "b": xr.DataArray(data=left_array2, dims=("y", "z")), - } - ) - right = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=right_array1, dims="x"), - "b": xr.DataArray(data=right_array2, dims="y"), - } + {"a": (("x", "y"), left_array1), "b": (("y", "z"), left_array2)}, ) + right = xr.Dataset({"a": ("x", right_array1), "b": ("y", right_array2)}) - units = { - **extract_units(left), - **({} if left_array1.check(unit) else {"a": None, "b": None}), - } - expected = strip_units(left).broadcast_equals( - strip_units(convert_units(right, units)) - ) & left_array1.check(unit) + units = merge_mappings( + extract_units(left), + {} if is_compatible(left_array1, unit) else {"a": None, "b": None}, + ) + expected = is_compatible(left_array1, unit) and strip_units( + left + ).broadcast_equals(strip_units(convert_units(right, units))) actual = left.broadcast_equals(right) assert expected == actual @@ -4532,68 +4421,74 @@ def test_broadcast_equals(self, unit, dtype): (method("unstack"), method("reset_index", "v"), method("reorder_levels")), ids=repr, ) - def test_stacking_stacked(self, func, dtype): - array1 = ( - np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m - ) + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ), + ) + def test_stacking_stacked(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "dims": (1, unit_registry.m), + } + data_unit, dim_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit array2 = ( np.linspace(-10, 0, 5 * 10 * 15).reshape(5, 10, 15).astype(dtype) - * unit_registry.m + * data_unit ) - x = np.arange(array1.shape[0]) - y = np.arange(array1.shape[1]) - z = np.arange(array2.shape[2]) + x = np.arange(array1.shape[0]) * dim_unit + y = np.arange(array1.shape[1]) * dim_unit + z = np.arange(array2.shape[2]) * dim_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - }, + data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)}, coords={"x": x, "y": y, "z": z}, ) + units = extract_units(ds) stacked = ds.stack(v=("x", "y")) - expected = attach_units( - func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m} - ) + expected = attach_units(func(strip_units(stacked)), units) actual = func(stacked) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="does not work with quantities yet") + @pytest.mark.xfail( + reason="stacked dimension's labels have to be hashable, but is a numpy.array" + ) def test_to_stacked_array(self, dtype): - labels = np.arange(5).astype(dtype) * unit_registry.s - arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels} + labels = range(5) * unit_registry.s + arrays = { + name: np.linspace(0, 1, 10).astype(dtype) * unit_registry.m + for name in labels + } - ds = xr.Dataset( - data_vars={ - name: xr.DataArray(data=array, dims="x") - for name, array in arrays.items() - } - ) + ds = xr.Dataset({name: ("x", array) for name, array in arrays.items()}) + units = {None: unit_registry.m, "y": unit_registry.s} func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"]) actual = func(ds).rename(None) - expected = attach_units( - func(strip_units(ds)).rename(None), - {None: unit_registry.m, "y": unit_registry.s}, - ) + expected = attach_units(func(strip_units(ds)).rename(None), units,) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("transpose", "y", "x", "z1", "z2"), - method("stack", a=("x", "y")), + method("stack", u=("x", "y")), method("set_index", x="x2"), - pytest.param( - method("shift", x=2), - marks=pytest.mark.xfail(reason="tries to concatenate nan arrays"), - ), + method("shift", x=2), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), @@ -4618,20 +4513,19 @@ def test_stacking_reordering(self, func, dtype): ds = xr.Dataset( data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y", "z1")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z2")), + "a": (("x", "y", "z1"), array1), + "b": (("x", "y", "z2"), array2), }, coords={"x": x, "y": y, "z1": z1, "z2": z2, "x2": ("x", x2)}, ) + units = extract_units(ds) - expected = attach_units( - func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK} - ) + expected = attach_units(func(strip_units(ds)), units) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="indexes strip units") @pytest.mark.parametrize( "indices", ( @@ -4643,22 +4537,14 @@ def test_isel(self, indices, dtype): array1 = np.arange(10).astype(dtype) * unit_registry.s array2 = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa - x = np.arange(len(array1)) * unit_registry.m - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) - expected = attach_units( - strip_units(ds).isel(x=indices), - {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m}, - ) + expected = attach_units(strip_units(ds).isel(x=indices), units) actual = ds.isel(x=indices) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -4675,7 +4561,7 @@ def test_isel(self, indices, dtype): pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4694,20 +4580,24 @@ def test_sel(self, raw_values, unit, error, dtype): values = raw_values * unit - if error is not None and not ( - isinstance(raw_values, (int, float)) and x.check(unit) - ): + # TODO: if we choose dm as compatible unit, single value keys + # can be found. Should we check that? + if error is not None: with pytest.raises(error): ds.sel(x=values) return expected = attach_units( - strip_units(ds).sel(x=strip_units(convert_units(values, {None: x.units}))), - {"a": array1.units, "b": array2.units, "x": x.units}, + strip_units(ds).sel( + x=strip_units(convert_units(values, {None: unit_registry.m})) + ), + extract_units(ds), ) actual = ds.sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -4724,7 +4614,7 @@ def test_sel(self, raw_values, unit, error, dtype): pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4743,9 +4633,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): values = raw_values * unit - if error is not None and not ( - isinstance(raw_values, (int, float)) and x.check(unit) - ): + # TODO: if we choose dm as compatible unit, single value keys + # can be found. Should we check that? + if error is not None: with pytest.raises(error): ds.drop_sel(x=values) @@ -4753,12 +4643,14 @@ def test_drop_sel(self, raw_values, unit, error, dtype): expected = attach_units( strip_units(ds).drop_sel( - x=strip_units(convert_units(values, {None: x.units})) + x=strip_units(convert_units(values, {None: unit_registry.m})) ), extract_units(ds), ) actual = ds.drop_sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -4775,7 +4667,7 @@ def test_drop_sel(self, raw_values, unit, error, dtype): pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4794,9 +4686,9 @@ def test_loc(self, raw_values, unit, error, dtype): values = raw_values * unit - if error is not None and not ( - isinstance(raw_values, (int, float)) and x.check(unit) - ): + # TODO: if we choose dm as compatible unit, single value keys + # can be found. Should we check that? + if error is not None: with pytest.raises(error): ds.loc[{"x": values}] @@ -4804,12 +4696,14 @@ def test_loc(self, raw_values, unit, error, dtype): expected = attach_units( strip_units(ds).loc[ - {"x": strip_units(convert_units(values, {None: x.units}))} + {"x": strip_units(convert_units(values, {None: unit_registry.m}))} ], - {"a": array1.units, "b": array2.units, "x": x.units}, + extract_units(ds), ) actual = ds.loc[{"x": values}] - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", @@ -4820,14 +4714,34 @@ def test_loc(self, raw_values, unit, error, dtype): ), ids=repr, ) - def test_head_tail_thin(self, func, dtype): - array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_head_tail_thin(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit_a, unit_b), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_a + array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_b coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.m, + "x": np.arange(10) * dim_unit, + "y": np.arange(5) * dim_unit, + "z": np.arange(8) * dim_unit, + "u": ("x", np.linspace(0, 1, 10) * coord_unit), + "v": ("y", np.linspace(1, 2, 5) * coord_unit), + "w": ("z", np.linspace(-1, 0, 8) * coord_unit), } ds = xr.Dataset( @@ -4841,8 +4755,10 @@ def test_head_tail_thin(self, func, dtype): expected = attach_units(func(strip_units(ds)), extract_units(ds)) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) + @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all")) @pytest.mark.parametrize( "shape", ( @@ -4853,13 +4769,9 @@ def test_head_tail_thin(self, func, dtype): pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"), ), ) - def test_squeeze(self, shape, dtype): + def test_squeeze(self, shape, dim, dtype): names = "xyzt" - coords = { - name: np.arange(length).astype(dtype) - * (unit_registry.m if name != "t" else unit_registry.s) - for name, length in zip(names, shape) - } + dim_lengths = dict(zip(names, shape)) array1 = ( np.linspace(0, 1, 10 * 20).astype(dtype).reshape(shape) * unit_registry.degK ) @@ -4869,74 +4781,59 @@ def test_squeeze(self, shape, dtype): ds = xr.Dataset( data_vars={ - "a": xr.DataArray(data=array1, dims=tuple(names[: len(shape)])), - "b": xr.DataArray(data=array2, dims=tuple(names[: len(shape)])), + "a": (tuple(names[: len(shape)]), array1), + "b": (tuple(names[: len(shape)]), array2), }, - coords=coords, ) units = extract_units(ds) - expected = attach_units(strip_units(ds).squeeze(), units) + kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {} - actual = ds.squeeze() - assert_equal_with_units(actual, expected) + expected = attach_units(strip_units(ds).squeeze(**kwargs), units) - # try squeezing the dimensions separately - names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) - for name in names: - expected = attach_units(strip_units(ds).squeeze(dim=name), units) - actual = ds.squeeze(dim=name) - assert_equal_with_units(actual, expected) + actual = ds.squeeze(**kwargs) - @pytest.mark.xfail(reason="ignores units") + assert_units_equal(expected, actual) + assert_equal(expected, actual) + + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex"), ), + ids=repr, ) - def test_interp(self, unit, error): - array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa - - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.s, + def test_interp_reindex(self, func, variant, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) - - new_coords = (np.arange(10) + 0.5) * unit + array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit + array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit - if error is not None: - with pytest.raises(error): - ds.interp(x=new_coords) + y = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(8) + 0.5 - units = extract_units(ds) - expected = attach_units( - strip_units(ds).interp(x=strip_units(convert_units(new_coords, units))), - units, + ds = xr.Dataset( + {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)} ) - actual = ds.interp(x=new_coords) + units = extract_units(ds) - assert_equal_with_units(actual, expected) + expected = attach_units(func(strip_units(ds), x=new_x), units) + actual = func(ds, x=new_x) - @pytest.mark.xfail(reason="ignores units") + assert_units_equal(expected, actual) + assert_equal(expected, actual) + + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -4949,106 +4846,67 @@ def test_interp(self, unit, error): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_interp_like(self, unit, error, dtype): - array1 = ( - np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) - - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.m, - } + @pytest.mark.parametrize("func", (method("interp"), method("reindex")), ids=repr) + def test_interp_reindex_indexing(self, func, unit, error, dtype): + array1 = np.linspace(-1, 0, 10).astype(dtype) + array2 = np.linspace(0, 1, 10).astype(dtype) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(8) + 0.5) * unit - other = xr.Dataset( - data_vars={ - "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")), - "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")), - }, - coords={ - "x": (np.arange(20) + 0.3) * unit, - "y": (np.arange(10) - 0.2) * unit, - "z": (np.arange(15) + 0.4) * unit, - }, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) + units = extract_units(ds) if error is not None: with pytest.raises(error): - ds.interp_like(other) + func(ds, x=new_x) return - units = extract_units(ds) - expected = attach_units( - strip_units(ds).interp_like(strip_units(convert_units(other, units))), units - ) - actual = ds.interp_like(other) + expected = attach_units(func(strip_units(ds), x=new_x), units) + actual = func(ds, x=new_x) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex_like"), ), + ids=repr, ) - def test_reindex(self, unit, error, dtype): - array1 = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(1, 2, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) - - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.s, + def test_interp_reindex_like(self, func, variant, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) - - new_coords = (np.arange(10) + 0.5) * unit + array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit + array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit - if error is not None: - with pytest.raises(error): - ds.reindex(x=new_coords) + y = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(8) + 0.5 - expected = attach_units( - strip_units(ds).reindex( - x=strip_units(convert_units(new_coords, {None: coords["x"].units})) - ), - extract_units(ds), + ds = xr.Dataset( + {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)} ) - actual = ds.reindex(x=new_coords) + units = extract_units(ds) + + other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x}) - assert_equal_with_units(actual, expected) + expected = attach_units(func(strip_units(ds), other), units) + actual = func(ds, other) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -5063,54 +4921,32 @@ def test_reindex(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error, dtype): - array1 = ( - np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) + @pytest.mark.parametrize( + "func", (method("interp_like"), method("reindex_like")), ids=repr + ) + def test_interp_reindex_like_indexing(self, func, unit, error, dtype): + array1 = np.linspace(-1, 0, 10).astype(dtype) + array2 = np.linspace(0, 1, 10).astype(dtype) - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.m, - } + x = np.arange(10) * unit_registry.m + new_x = (np.arange(8) + 0.5) * unit - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) + units = extract_units(ds) - other = xr.Dataset( - data_vars={ - "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")), - "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")), - }, - coords={ - "x": (np.arange(20) + 0.3) * unit, - "y": (np.arange(10) - 0.2) * unit, - "z": (np.arange(15) + 0.4) * unit, - }, - ) + other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x}) if error is not None: with pytest.raises(error): - ds.reindex_like(other) + func(ds, other) return - units = extract_units(ds) - expected = attach_units( - strip_units(ds).reindex_like(strip_units(convert_units(other, units))), - units, - ) - actual = ds.reindex_like(other) + expected = attach_units(func(strip_units(ds), other), units) + actual = func(ds, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", @@ -5120,30 +4956,46 @@ def test_reindex_like(self, unit, error, dtype): method("integrate", coord="x"), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) <= "0.12", + reason="nanquantile not implemented yet", + ), ), method("reduce", func=np.sum, dim="x"), method("map", np.fabs), ), ids=repr, ) - def test_computation(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) - x = np.arange(10) * unit_registry.m - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_computation(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2 + x = np.arange(4) * dim_unit + y = np.arange(5) * dim_unit + z = np.arange(3) * dim_unit ds = xr.Dataset( data_vars={ "a": xr.DataArray(data=array1, dims=("x", "y")), "b": xr.DataArray(data=array2, dims=("x", "z")), }, - coords={"x": x, "y": y, "z": z}, + coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)}, ) units = extract_units(ds) @@ -5151,69 +5003,105 @@ def test_computation(self, func, dtype): expected = attach_units(func(strip_units(ds)), units) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("groupby", "x"), - method("groupby_bins", "x", bins=4), + pytest.param( + method("groupby_bins", "x", bins=2), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) <= "0.12", + reason="needs assert_allclose but that does not work with pint", + ), + ), method("coarsen", x=2), pytest.param( method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units") ), pytest.param( method("rolling_exp", x=3), - marks=pytest.mark.xfail(reason="uses numbagg which strips units"), + marks=pytest.mark.xfail( + reason="numbagg functions are not supported by pint" + ), ), ), ids=repr, ) - def test_computation_objects(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) - * unit_registry.Pa - ) - x = np.arange(10) * unit_registry.m - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_computation_objects(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2 + x = np.arange(4) * dim_unit + y = np.arange(5) * dim_unit + z = np.arange(3) * dim_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - }, - coords={"x": x, "y": y, "z": z}, + data_vars={"a": (("x", "y"), array1), "b": (("x", "z"), array2)}, + coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)}, ) units = extract_units(ds) args = [] if func.name != "groupby" else ["y"] - reduce_func = method("mean", *args) - expected = attach_units(reduce_func(func(strip_units(ds))), units) - actual = reduce_func(func(ds)) + expected = attach_units(func(strip_units(ds)).mean(*args), units) + actual = func(ds).mean(*args) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + # TODO: remove once pint 0.12 has been released + if LooseVersion(pint.__version__) <= "0.12": + assert_equal(expected, actual) + else: + assert_allclose(expected, actual) + + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_resample(self, variant, dtype): + # TODO: move this to test_computation_objects + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit2 - def test_resample(self, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) t = pd.date_range("10-09-2010", periods=array1.shape[0], freq="1y") - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + y = np.arange(5) * dim_unit + z = np.arange(8) * dim_unit + + u = np.linspace(-1, 0, 5) * coord_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("time", "y")), - "b": xr.DataArray(data=array2, dims=("time", "z")), - }, - coords={"time": t, "y": y, "z": z}, + data_vars={"a": (("time", "y"), array1), "b": (("time", "z"), array2)}, + coords={"time": t, "y": y, "z": z, "u": ("y", u)}, ) units = extract_units(ds) @@ -5222,43 +5110,59 @@ def test_resample(self, dtype): expected = attach_units(func(strip_units(ds)).mean(), units) actual = func(ds).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("assign", c=lambda ds: 10 * ds.b), - method("assign_coords", v=("x", np.arange(10) * unit_registry.s)), + method("assign_coords", v=("x", np.arange(5) * unit_registry.s)), method("first"), method("last"), pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) <= "0.12", + reason="nanquantile not implemented", + ), ), ), ids=repr, ) - def test_grouped_operations(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) - * unit_registry.Pa - ) - x = np.arange(10) * unit_registry.m - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_grouped_operations(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2 + x = np.arange(5) * dim_unit + y = np.arange(4) * dim_unit + z = np.arange(3) * dim_unit + + u = np.linspace(-1, 0, 4) * coord_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - }, - coords={"x": x, "y": y, "z": z}, + data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)}, + coords={"x": x, "y": y, "z": z, "u": ("y", u)}, ) - units = extract_units(ds) - units.update({"c": unit_registry.Pa, "v": unit_registry.s}) + + assigned_units = {"c": unit2, "v": unit_registry.s} + units = merge_mappings(extract_units(ds), assigned_units) stripped_kwargs = { name: strip_units(value) for name, value in func.kwargs.items() @@ -5268,20 +5172,26 @@ def test_grouped_operations(self, func, dtype): ) actual = func(ds.groupby("y")) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("pipe", lambda ds: ds * 10), method("assign", d=lambda ds: ds.b * 10), - method("assign_coords", y2=("y", np.arange(5) * unit_registry.mm)), + method("assign_coords", y2=("y", np.arange(4) * unit_registry.mm)), method("assign_attrs", attr1="value"), method("rename", x2="x_mm"), method("rename_vars", c="temperature"), method("rename_dims", x="offset_x"), - method("swap_dims", {"x": "x2"}), - method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1), + method("swap_dims", {"x": "u"}), + pytest.param( + method( + "expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1 + ), + marks=pytest.mark.xfail(reason="indexes don't support units"), + ), method("drop_vars", "x"), method("drop_dims", "z"), method("set_coords", names="c"), @@ -5290,40 +5200,55 @@ def test_grouped_operations(self, func, dtype): ), ids=repr, ) - def test_content_manipulation(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) - * unit_registry.m ** 3 - ) - array2 = ( - np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) - * unit_registry.Pa - ) - array3 = np.linspace(0, 10, 10).astype(dtype) * unit_registry.degK + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_content_manipulation(self, func, variant, dtype): + variants = { + "data": ( + (unit_registry.m ** 3, unit_registry.Pa, unit_registry.degK), + 1, + 1, + ), + "dims": ((1, 1, 1), unit_registry.m, 1), + "coords": ((1, 1, 1), 1, unit_registry.m), + } + (unit1, unit2, unit3), dim_unit, coord_unit = variants.get(variant) - x = np.arange(10) * unit_registry.m - x2 = x.to(unit_registry.mm) - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2 + array3 = np.linspace(0, 10, 5).astype(dtype) * unit3 + + x = np.arange(5) * dim_unit + y = np.arange(4) * dim_unit + z = np.arange(3) * dim_unit + + x2 = np.linspace(-1, 0, 5) * coord_unit ds = xr.Dataset( data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - "c": xr.DataArray(data=array3, dims="x"), + "a": (("x", "y"), array1), + "b": (("x", "y", "z"), array2), + "c": ("x", array3), }, coords={"x": x, "y": y, "z": z, "x2": ("x", x2)}, ) - units = { - **extract_units(ds), - **{ - "y2": unit_registry.mm, - "x_mm": unit_registry.mm, - "offset_x": unit_registry.m, - "d": unit_registry.Pa, - "temperature": unit_registry.degK, - }, + + new_units = { + "y2": unit_registry.mm, + "x_mm": coord_unit, + "offset_x": unit_registry.m, + "d": unit2, + "temperature": unit3, } + units = merge_mappings(extract_units(ds), new_units) stripped_kwargs = { key: strip_units(value) for key, value in func.kwargs.items() @@ -5331,7 +5256,8 @@ def test_content_manipulation(self, func, dtype): expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -5356,25 +5282,29 @@ def test_content_manipulation(self, func, dtype): ), ) def test_merge(self, variant, unit, error, dtype): - original_data_unit = unit_registry.m - original_dim_unit = unit_registry.m - original_coord_unit = unit_registry.m + left_variants = { + "data": (unit_registry.m, 1, 1), + "dims": (1, unit_registry.m, 1), + "coords": (1, 1, unit_registry.m), + } - variants = { - "data": (unit, original_dim_unit, original_coord_unit), - "dims": (original_data_unit, unit, original_coord_unit), - "coords": (original_data_unit, original_dim_unit, unit), + left_data_unit, left_dim_unit, left_coord_unit = left_variants.get(variant) + + right_variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), } - data_unit, dim_unit, coord_unit = variants.get(variant) + right_data_unit, right_dim_unit, right_coord_unit = right_variants.get(variant) - left_array = np.arange(10).astype(dtype) * original_data_unit - right_array = np.arange(-5, 5).astype(dtype) * data_unit + left_array = np.arange(10).astype(dtype) * left_data_unit + right_array = np.arange(-5, 5).astype(dtype) * right_data_unit - left_dim = np.arange(10, 20) * original_dim_unit - right_dim = np.arange(5, 15) * dim_unit + left_dim = np.arange(10, 20) * left_dim_unit + right_dim = np.arange(5, 15) * right_dim_unit - left_coord = np.arange(-10, 0) * original_coord_unit - right_coord = np.arange(-15, -5) * coord_unit + left_coord = np.arange(-10, 0) * left_coord_unit + right_coord = np.arange(-15, -5) * right_coord_unit left = xr.Dataset( data_vars={"a": ("x", left_array)}, @@ -5397,4 +5327,5 @@ def test_merge(self, variant, unit, error, dtype): expected = attach_units(strip_units(left).merge(strip_units(converted)), units) actual = left.merge(right) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) From b9e6a36ff7a0ca3593165cf191f4152666fa4a66 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 17 Jun 2020 22:45:10 -0700 Subject: [PATCH 058/342] Revise pull request template (#4039) * Revise pull request template See below for the new language, to clarify that documentation is only necessary for "user visible changes." I added "including notable bug fixes" to indicate that minor bug fixes may not be worth noting (I was thinking of test-suite only fixes in this category) but perhaps that is too confusing. * remove line break * Update releasing notes --- .github/PULL_REQUEST_TEMPLATE.md | 3 +- HOW_TO_RELEASE.md | 53 ++++++++++++++++---------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a921bddaa23..c9c0b720c35 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,4 +3,5 @@ - [ ] Closes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API + - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` + - [ ] New functions/methods are listed in `api.rst` diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 3fdd1d7236d..c890d61d966 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -1,4 +1,4 @@ -How to issue an xarray release in 16 easy steps +# How to issue an xarray release in 17 easy steps Time required: about an hour. @@ -6,7 +6,16 @@ Time required: about an hour. ``` git pull upstream master ``` - 2. Look over whats-new.rst and the docs. Make sure "What's New" is complete + 2. Get a list of contributors with: + ``` + git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /' + ``` + or by substituting the _previous_ release in: + ``` + git log v0.X.Y-1.. --format=%aN | sort -u | perl -pe 's/\n/$1, /' + ``` + Add these into `whats-new.rst` somewhere :) + 3. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and consider adding a brief summary note describing the release at the top. Things to watch out for: @@ -16,41 +25,41 @@ Time required: about an hour. due to a bad merge. Check for these before a release by using git diff, e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous release. - 3. If you have any doubts, run the full test suite one final time! + 4. If you have any doubts, run the full test suite one final time! ``` pytest ``` - 4. Check that the ReadTheDocs build is passing. - 5. On the master branch, commit the release in git: + 5. Check that the ReadTheDocs build is passing. + 6. On the master branch, commit the release in git: ``` git commit -am 'Release v0.X.Y' ``` - 6. Tag the release: + 7. Tag the release: ``` git tag -a v0.X.Y -m 'v0.X.Y' ``` - 7. Build source and binary wheels for pypi: + 8. Build source and binary wheels for pypi: ``` git clean -xdf # this deletes all uncommited changes! python setup.py bdist_wheel sdist ``` - 8. Use twine to check the package build: + 9. Use twine to check the package build: ``` twine check dist/xarray-0.X.Y* ``` - 9. Use twine to register and upload the release on pypi. Be careful, you can't +10. Use twine to register and upload the release on pypi. Be careful, you can't take this back! ``` twine upload dist/xarray-0.X.Y* ``` You will need to be listed as a package owner at https://pypi.python.org/pypi/xarray for this to work. -10. Push your changes to master: +11. Push your changes to master: ``` git push upstream master git push upstream --tags ``` -11. Update the stable branch (used by ReadTheDocs) and switch back to master: +12. Update the stable branch (used by ReadTheDocs) and switch back to master: ``` git checkout stable git rebase master @@ -60,7 +69,7 @@ Time required: about an hour. It's OK to force push to 'stable' if necessary. (We also update the stable branch with `git cherrypick` for documentation only fixes that apply the current released version.) -12. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: +13. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: ``` .. _whats-new.0.X.Y+1: @@ -86,19 +95,19 @@ Time required: about an hour. Internal Changes ~~~~~~~~~~~~~~~~ ``` -13. Commit your changes and push to master again: +14. Commit your changes and push to master again: ``` git commit -am 'New whatsnew section' git push upstream master ``` You're done pushing to master! -14. Issue the release on GitHub. Click on "Draft a new release" at +15. Issue the release on GitHub. Click on "Draft a new release" at https://github.com/pydata/xarray/releases. Type in the version number, but don't bother to describe it -- we maintain that on the docs instead. -15. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ +16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ and switch your new release tag (at the bottom) from "Inactive" to "Active". It should now build automatically. -16. Issue the release announcement! For bug fix releases, I usually only email +17. Issue the release announcement! For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com @@ -109,18 +118,8 @@ Time required: about an hour. Google search will turn up examples of prior release announcements (look for "ANN xarray"). - You can get a list of contributors with: - ``` - git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u - ``` - or by substituting the _previous_ release in: - ``` - git log v0.X.Y-1.. --format="%aN" | sort -u - ``` - NB: copying this output into a Google Groups form can cause - [issues](https://groups.google.com/forum/#!topic/xarray/hK158wAviPs) with line breaks, so take care -Note on version numbering: +## Note on version numbering We follow a rough approximation of semantic version. Only major releases (0.X.0) should include breaking changes. Minor releases (0.X.Y) are for bug fixes and From 2a8cd3b0545851cff2773d493e30d5c84aa1c4db Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 23 Jun 2020 00:51:56 +0200 Subject: [PATCH 059/342] use builtin python types instead of the numpy alias (#4170) * replace np.bool with the python type * replace np.int with the python type * replace np.complex with the builtin python type * replace np.float with the builtin python type --- xarray/coding/times.py | 4 ++-- xarray/conventions.py | 2 +- xarray/core/common.py | 2 +- xarray/core/formatting.py | 2 +- xarray/tests/test_backends.py | 2 +- xarray/tests/test_conventions.py | 6 ++---- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_dataset.py | 10 +++++----- xarray/tests/test_dtypes.py | 4 ++-- xarray/tests/test_plot.py | 4 ++-- 10 files changed, 18 insertions(+), 20 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index dafa8ca03b1..77b2d2c7937 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -158,7 +158,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( - flat_num_dates.astype(np.float), units, calendar + flat_num_dates.astype(float), units, calendar ) if ( @@ -179,7 +179,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): dates = cftime_to_nptime(dates) elif use_cftime: dates = _decode_datetime_with_cftime( - flat_num_dates.astype(np.float), units, calendar + flat_num_dates.astype(float), units, calendar ) else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) diff --git a/xarray/conventions.py b/xarray/conventions.py index 588fcea71a3..fc0572944f3 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -116,7 +116,7 @@ def maybe_default_fill_value(var): def maybe_encode_bools(var): if ( - (var.dtype == np.bool) + (var.dtype == bool) and ("dtype" not in var.encoding) and ("dtype" not in var.attrs) ): diff --git a/xarray/core/common.py b/xarray/core/common.py index e343f342040..f759f4c32dd 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1481,7 +1481,7 @@ def zeros_like(other, dtype: DTypeLike = None): * lat (lat) int64 1 2 * lon (lon) int64 0 1 2 - >>> xr.zeros_like(x, dtype=np.float) + >>> xr.zeros_like(x, dtype=float) array([[0., 0., 0.], [0., 0., 0.]]) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index bd9576a4440..3a9dd772a9f 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -140,7 +140,7 @@ def format_item(x, timedelta_format=None, quote_strings=True): return format_timedelta(x, timedelta_format=timedelta_format) elif isinstance(x, (str, bytes)): return repr(x) if quote_strings else x - elif isinstance(x, (float, np.float)): + elif isinstance(x, (float, np.float_)): return f"{x:.4}" else: return str(x) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3642c1eb9b7..177435fa864 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -885,7 +885,7 @@ def test_roundtrip_endian(self): "x": np.arange(3, 10, dtype=">i2"), "y": np.arange(3, 20, dtype=" Date: Wed, 24 Jun 2020 16:41:05 +0200 Subject: [PATCH 060/342] Proposal for better error message about in-place operation (#3976) * Improve error message: automatic alignment during in-place operation. * Sorted imports. * Fix tests. * Add suggestions from S. Hoyer. --- xarray/core/dataarray.py | 13 ++++++++++--- xarray/tests/test_dataarray.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5814c828663..b0df874953b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -53,7 +53,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes, propagate_indexes from .indexing import is_fancy_indexer -from .merge import PANDAS_TYPES, _extract_indexes_from_coords +from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords from .options import OPTIONS from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( @@ -2713,8 +2713,15 @@ def func(self, other): # don't support automatic alignment with in-place arithmetic. other_coords = getattr(other, "coords", None) other_variable = getattr(other, "variable", other) - with self.coords._merge_inplace(other_coords): - f(self.variable, other_variable) + try: + with self.coords._merge_inplace(other_coords): + f(self.variable, other_variable) + except MergeError as exc: + raise MergeError( + "Automatic alignment is not supported for in-place operations.\n" + "Consider aligning the indices manually or using a not-in-place operation.\n" + "See https://github.com/pydata/xarray/issues/3910 for more explanations." + ) from exc return self return func diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 36bee63bf3b..8fc37ac458d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1930,9 +1930,9 @@ def test_inplace_math_basics(self): def test_inplace_math_automatic_alignment(self): a = DataArray(range(5), [("x", range(5))]) b = DataArray(range(1, 6), [("x", range(1, 6))]) - with pytest.raises(xr.MergeError): + with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"): a += b - with pytest.raises(xr.MergeError): + with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"): b += a def test_math_name(self): From a2dac231cd946893d9fc51219b0c053e04fa7fb7 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 24 Jun 2020 08:44:59 -0700 Subject: [PATCH 061/342] Remove
     from nested HTML repr (#4171)
    
    Using `
    ` messes up the display of nested HTML reprs, e.g., from dask. Now
    we only use the `
    ` tag when displaying text.
    ---
     xarray/core/formatting_html.py       | 8 +++++---
     xarray/tests/test_formatting_html.py | 2 +-
     2 files changed, 6 insertions(+), 4 deletions(-)
    
    diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
    index 69832d6ca3d..c99683e91c7 100644
    --- a/xarray/core/formatting_html.py
    +++ b/xarray/core/formatting_html.py
    @@ -20,7 +20,9 @@ def short_data_repr_html(array):
         internal_data = getattr(array, "variable", array)._data
         if hasattr(internal_data, "_repr_html_"):
             return internal_data._repr_html_()
    -    return escape(short_data_repr(array))
    +    else:
    +        text = escape(short_data_repr(array))
    +        return f"
    {text}
    " def format_dims(dims, coord_names): @@ -123,7 +125,7 @@ def summarize_variable(name, var, is_index=False, dtype=None, preview=None): f"" f"
    {attrs_ul}
    " - f"
    {data_repr}
    " + f"
    {data_repr}
    " ) @@ -193,7 +195,7 @@ def array_section(obj): f"" f"" f"
    {preview}
    " - f"
    {data_repr}
    " + f"
    {data_repr}
    " "
    " ) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 90e74f1f78f..ea636403318 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -48,7 +48,7 @@ def dataset(): def test_short_data_repr_html(dataarray): data_repr = fh.short_data_repr_html(dataarray) - assert data_repr.startswith("array") + assert data_repr.startswith("
    array")
     
     
     def test_short_data_repr_html_non_str_keys(dataset):
    
    From f281b3b62712079605d0f873c2f38623212bdef0 Mon Sep 17 00:00:00 2001
    From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
    Date: Wed, 24 Jun 2020 12:04:11 -0400
    Subject: [PATCH 062/342] Limit length of dataarray reprs (#3905)
    
    * limit length of dataarray reprs
    
    * repr depends on numpy versions
    
    * whatsnew
    
    * correct comment based on @keewis comment
    
    * Update whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst               |  4 +++-
     xarray/core/formatting.py       | 15 +++++++++++++--
     xarray/tests/test_formatting.py | 13 +++++++++++--
     3 files changed, 27 insertions(+), 5 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 4b5bb1e491f..ea3e32d3a80 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -66,6 +66,9 @@ New Features
     - Limited the length of array items with long string reprs to a
       reasonable width (:pull:`3900`)
       By `Maximilian Roos `_
    +- Limited the number of lines of large arrays when numpy reprs would have greater than 40.
    +  (:pull:`3905`)
    +  By `Maximilian Roos `_
     - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
       :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:issue:`60`, :pull:`3871`)
       By `Todd Jennings `_
    @@ -96,7 +99,6 @@ New Features
       By `Deepak Cherian `_
     - :py:meth:`map_blocks` can now handle dask-backed xarray objects in ``args``. (:pull:`3818`)
       By `Deepak Cherian `_
    -
     - Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`,
       (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
       :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas
    diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
    index 3a9dd772a9f..28eaae5f05b 100644
    --- a/xarray/core/formatting.py
    +++ b/xarray/core/formatting.py
    @@ -3,7 +3,7 @@
     import contextlib
     import functools
     from datetime import datetime, timedelta
    -from itertools import zip_longest
    +from itertools import chain, zip_longest
     from typing import Hashable
     
     import numpy as np
    @@ -422,6 +422,17 @@ def set_numpy_options(*args, **kwargs):
             np.set_printoptions(**original)
     
     
    +def limit_lines(string: str, *, limit: int):
    +    """
    +    If the string is more lines than the limit,
    +    this returns the middle lines replaced by an ellipsis
    +    """
    +    lines = string.splitlines()
    +    if len(lines) > limit:
    +        string = "\n".join(chain(lines[: limit // 2], ["..."], lines[-limit // 2 :]))
    +    return string
    +
    +
     def short_numpy_repr(array):
         array = np.asarray(array)
     
    @@ -447,7 +458,7 @@ def short_data_repr(array):
         elif hasattr(internal_data, "__array_function__") or isinstance(
             internal_data, dask_array_type
         ):
    -        return repr(array.data)
    +        return limit_lines(repr(array.data), limit=40)
         elif array._in_memory or array.size < 1e5:
             return short_numpy_repr(array)
         else:
    diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py
    index 6881c0bc0ff..82de8080c80 100644
    --- a/xarray/tests/test_formatting.py
    +++ b/xarray/tests/test_formatting.py
    @@ -405,10 +405,19 @@ def test_short_numpy_repr():
             np.random.randn(20, 20),
             np.random.randn(5, 10, 15),
             np.random.randn(5, 10, 15, 3),
    +        np.random.randn(100, 5, 1),
         ]
         # number of lines:
    -    # for default numpy repr: 167, 140, 254, 248
    -    # for short_numpy_repr: 1, 7, 24, 19
    +    # for default numpy repr: 167, 140, 254, 248, 599
    +    # for short_numpy_repr: 1, 7, 24, 19, 25
         for array in cases:
             num_lines = formatting.short_numpy_repr(array).count("\n") + 1
             assert num_lines < 30
    +
    +
    +def test_large_array_repr_length():
    +
    +    da = xr.DataArray(np.random.randn(100, 5, 1))
    +
    +    result = repr(da).splitlines()
    +    assert len(result) < 50
    
    From 24d755d59421fd0eaf22ad109408275d2bfb8216 Mon Sep 17 00:00:00 2001
    From: johnomotani 
    Date: Wed, 24 Jun 2020 19:22:18 +0100
    Subject: [PATCH 063/342] Fix 4009 (#4173)
    
    * Test attrs handling in open_mfdataset
    
    * Fix attrs handling in open_mfdataset()
    
    Need to pass combine_attrs="drop", to allow attrs_file to set the attrs.
    
    * Update whats-new.rst
    
    * Update doc/whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst             |  2 ++
     xarray/backends/api.py        |  8 +++++++-
     xarray/tests/test_backends.py | 30 ++++++++++++++++++++++++++++++
     3 files changed, 39 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index ea3e32d3a80..bf57f5e951d 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -107,6 +107,8 @@ New Features
     
     Bug fixes
     ~~~~~~~~~
    +- Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`)
    +  By `John Omotani `_
     - If groupby receives a ``DataArray`` with name=None, assign a default name (:issue:`158`)
       By `Phil Butcher `_.
     - Support dark mode in VS code (:issue:`4024`)
    diff --git a/xarray/backends/api.py b/xarray/backends/api.py
    index 0919d2a582b..4077d7a02c8 100644
    --- a/xarray/backends/api.py
    +++ b/xarray/backends/api.py
    @@ -967,12 +967,18 @@ def open_mfdataset(
                     coords=coords,
                     ids=ids,
                     join=join,
    +                combine_attrs="drop",
                 )
             elif combine == "by_coords":
                 # Redo ordering from coordinates, ignoring how they were ordered
                 # previously
                 combined = combine_by_coords(
    -                datasets, compat=compat, data_vars=data_vars, coords=coords, join=join
    +                datasets,
    +                compat=compat,
    +                data_vars=data_vars,
    +                coords=coords,
    +                join=join,
    +                combine_attrs="drop",
                 )
             else:
                 raise ValueError(
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 177435fa864..1e33eccb83e 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -2662,6 +2662,36 @@ def test_open_mfdataset_does_same_as_concat(self, combine, opt, join):
                     ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join)
                     assert_identical(ds, ds_expect)
     
    +    def test_open_mfdataset_dataset_attr_by_coords(self):
    +        """
    +        Case when an attribute differs across the multiple files
    +        """
    +        with self.setup_files_and_datasets() as (files, [ds1, ds2]):
    +            # Give the files an inconsistent attribute
    +            for i, f in enumerate(files):
    +                ds = open_dataset(f).load()
    +                ds.attrs["test_dataset_attr"] = 10 + i
    +                ds.close()
    +                ds.to_netcdf(f)
    +
    +            with xr.open_mfdataset(files, combine="by_coords", concat_dim="t") as ds:
    +                assert ds.test_dataset_attr == 10
    +
    +    def test_open_mfdataset_dataarray_attr_by_coords(self):
    +        """
    +        Case when an attribute of a member DataArray differs across the multiple files
    +        """
    +        with self.setup_files_and_datasets() as (files, [ds1, ds2]):
    +            # Give the files an inconsistent attribute
    +            for i, f in enumerate(files):
    +                ds = open_dataset(f).load()
    +                ds["v1"].attrs["test_dataarray_attr"] = i
    +                ds.close()
    +                ds.to_netcdf(f)
    +
    +            with xr.open_mfdataset(files, combine="by_coords", concat_dim="t") as ds:
    +                assert ds["v1"].test_dataarray_attr == 0
    +
         @pytest.mark.parametrize("combine", ["nested", "by_coords"])
         @pytest.mark.parametrize("opt", ["all", "minimal", "different"])
         def test_open_mfdataset_exact_join_raises_error(self, combine, opt):
    
    From 3088de25987f6863ba6c7a73b23a7ca7a8c93a69 Mon Sep 17 00:00:00 2001
    From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com>
    Date: Wed, 24 Jun 2020 19:22:54 +0100
    Subject: [PATCH 064/342] Remove old auto combine (#3926)
    
    * Removed auto_combine function and argument to open_mfdataset
    
    * Removed corresponding tests
    
    * Code formatting
    
    * updated what's new
    
    * PEP8 fixes
    
    * Update doc/whats-new.rst
    
    `:py:func:` links fixed
    
    Co-Authored-By: keewis 
    
    * removed auto_combine from API docs
    
    * clarify that auto_combine is completely removed
    
    * concat_dim=None by default for combine='nested'
    
    * fix black formatting
    
    Co-authored-by: keewis 
    Co-authored-by: dcherian 
    ---
     doc/api-hidden.rst            |   2 -
     doc/api.rst                   |   1 -
     doc/whats-new.rst             |   7 +
     xarray/__init__.py            |   3 +-
     xarray/backends/api.py        |  61 ++------
     xarray/core/combine.py        | 271 ----------------------------------
     xarray/tests/test_backends.py |  86 ++---------
     xarray/tests/test_combine.py  | 176 +---------------------
     8 files changed, 34 insertions(+), 573 deletions(-)
    
    diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
    index 313428c29d2..5542e488143 100644
    --- a/doc/api-hidden.rst
    +++ b/doc/api-hidden.rst
    @@ -9,8 +9,6 @@
     .. autosummary::
        :toctree: generated/
     
    -   auto_combine
    -
        Dataset.nbytes
        Dataset.chunks
     
    diff --git a/doc/api.rst b/doc/api.rst
    index bb0edd0dfa5..603e3e8f6cf 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -21,7 +21,6 @@ Top-level functions
        broadcast
        concat
        merge
    -   auto_combine
        combine_by_coords
        combine_nested
        where
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index bf57f5e951d..a4ec85c1950 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -33,6 +33,13 @@ Breaking changes
       `_.
       (:pull:`3274`)
       By `Elliott Sales de Andrade `_
    +- The old :py:func:`auto_combine` function has now been removed in
    +  favour of the :py:func:`combine_by_coords` and
    +  :py:func:`combine_nested` functions. This also means that
    +  the default behaviour of :py:func:`open_mfdataset` has changed to use
    +  ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`)
    +  By `Tom Nicholas `_.
    +
     
     Enhancements
     ~~~~~~~~~~~~
    diff --git a/xarray/__init__.py b/xarray/__init__.py
    index cb4824d188d..3886edc60e6 100644
    --- a/xarray/__init__.py
    +++ b/xarray/__init__.py
    @@ -16,7 +16,7 @@
     from .coding.frequencies import infer_freq
     from .conventions import SerializationWarning, decode_cf
     from .core.alignment import align, broadcast
    -from .core.combine import auto_combine, combine_by_coords, combine_nested
    +from .core.combine import combine_by_coords, combine_nested
     from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
     from .core.computation import apply_ufunc, corr, cov, dot, polyval, where
     from .core.concat import concat
    @@ -47,7 +47,6 @@
         "align",
         "apply_ufunc",
         "as_variable",
    -    "auto_combine",
         "broadcast",
         "cftime_range",
         "combine_by_coords",
    diff --git a/xarray/backends/api.py b/xarray/backends/api.py
    index 4077d7a02c8..8d7c2230b2d 100644
    --- a/xarray/backends/api.py
    +++ b/xarray/backends/api.py
    @@ -4,7 +4,6 @@
     from io import BytesIO
     from numbers import Number
     from pathlib import Path
    -from textwrap import dedent
     from typing import (
         TYPE_CHECKING,
         Callable,
    @@ -23,7 +22,6 @@
     from ..core.combine import (
         _infer_concat_order_from_positions,
         _nested_combine,
    -    auto_combine,
         combine_by_coords,
     )
     from ..core.dataarray import DataArray
    @@ -726,14 +724,14 @@ def close(self):
     def open_mfdataset(
         paths,
         chunks=None,
    -    concat_dim="_not_supplied",
    +    concat_dim=None,
         compat="no_conflicts",
         preprocess=None,
         engine=None,
         lock=None,
         data_vars="all",
         coords="different",
    -    combine="_old_auto",
    +    combine="by_coords",
         autoclose=None,
         parallel=False,
         join="outer",
    @@ -746,9 +744,8 @@ def open_mfdataset(
         the datasets into one before returning the result, and if combine='nested' then
         ``combine_nested`` is used. The filepaths must be structured according to which
         combining function is used, the details of which are given in the documentation for
    -    ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
    -    ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
    -    ``combine='nested'`` in future. Requires dask to be installed. See documentation for
    +    ``combine_by_coords`` and ``combine_nested``. By default ``combine='by_coords'``
    +    will be used. Requires dask to be installed. See documentation for
         details on dask [1]_. Global attributes from the ``attrs_file`` are used
         for the combined dataset.
     
    @@ -758,7 +755,7 @@ def open_mfdataset(
             Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of
             files to open. Paths can be given as strings or as pathlib Paths. If
             concatenation along more than one dimension is desired, then ``paths`` must be a
    -        nested list-of-lists (see ``manual_combine`` for details). (A string glob will
    +        nested list-of-lists (see ``combine_nested`` for details). (A string glob will
             be expanded to a 1-dimensional list.)
         chunks : int or dict, optional
             Dictionary with keys given by dimension names and values given by chunk sizes.
    @@ -768,15 +765,16 @@ def open_mfdataset(
             see the full documentation for more details [2]_.
         concat_dim : str, or list of str, DataArray, Index or None, optional
             Dimensions to concatenate files along.  You only need to provide this argument
    -        if any of the dimensions along which you want to concatenate is not a dimension
    -        in the original datasets, e.g., if you want to stack a collection of 2D arrays
    -        along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to
    -        disable concatenation along a particular dimension.
    +        if ``combine='by_coords'``, and if any of the dimensions along which you want to
    +        concatenate is not a dimension in the original datasets, e.g., if you want to
    +        stack a collection of 2D arrays along a third dimension. Set
    +        ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a
    +        particular dimension. Default is None, which for a 1D list of filepaths is
    +        equivalent to opening the files separately and then merging them with
    +        ``xarray.merge``.
         combine : {'by_coords', 'nested'}, optional
             Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to
    -        combine all the data. If this argument is not provided, `xarray.auto_combine` is
    -        used, but in the future this behavior will switch to use
    -        `xarray.combine_by_coords` by default.
    +        combine all the data. Default is to use ``xarray.combine_by_coords``.
         compat : {'identical', 'equals', 'broadcast_equals',
                   'no_conflicts', 'override'}, optional
             String indicating how to compare variables of the same name for
    @@ -869,7 +867,6 @@ def open_mfdataset(
         --------
         combine_by_coords
         combine_nested
    -    auto_combine
         open_dataset
     
         References
    @@ -897,11 +894,8 @@ def open_mfdataset(
         # If combine='nested' then this creates a flat list which is easier to
         # iterate over, while saving the originally-supplied structure as "ids"
         if combine == "nested":
    -        if str(concat_dim) == "_not_supplied":
    -            raise ValueError("Must supply concat_dim when using " "combine='nested'")
    -        else:
    -            if isinstance(concat_dim, (str, DataArray)) or concat_dim is None:
    -                concat_dim = [concat_dim]
    +        if isinstance(concat_dim, (str, DataArray)) or concat_dim is None:
    +            concat_dim = [concat_dim]
         combined_ids_paths = _infer_concat_order_from_positions(paths)
         ids, paths = (list(combined_ids_paths.keys()), list(combined_ids_paths.values()))
     
    @@ -933,30 +927,7 @@ def open_mfdataset(
     
         # Combine all datasets, closing them in case of a ValueError
         try:
    -        if combine == "_old_auto":
    -            # Use the old auto_combine for now
    -            # Remove this after deprecation cycle from #2616 is complete
    -            basic_msg = dedent(
    -                """\
    -            In xarray version 0.15 the default behaviour of `open_mfdataset`
    -            will change. To retain the existing behavior, pass
    -            combine='nested'. To use future default behavior, pass
    -            combine='by_coords'. See
    -            http://xarray.pydata.org/en/stable/combining.html#combining-multi
    -            """
    -            )
    -            warnings.warn(basic_msg, FutureWarning, stacklevel=2)
    -
    -            combined = auto_combine(
    -                datasets,
    -                concat_dim=concat_dim,
    -                compat=compat,
    -                data_vars=data_vars,
    -                coords=coords,
    -                join=join,
    -                from_openmfds=True,
    -            )
    -        elif combine == "nested":
    +        if combine == "nested":
                 # Combined nested list by successive concat and merge operations
                 # along each dimension, using structure given by "ids"
                 combined = _nested_combine(
    diff --git a/xarray/core/combine.py b/xarray/core/combine.py
    index 1f990457798..58bd7178fa2 100644
    --- a/xarray/core/combine.py
    +++ b/xarray/core/combine.py
    @@ -1,7 +1,5 @@
     import itertools
    -import warnings
     from collections import Counter
    -from textwrap import dedent
     
     import pandas as pd
     
    @@ -762,272 +760,3 @@ def combine_by_coords(
             join=join,
             combine_attrs=combine_attrs,
         )
    -
    -
    -# Everything beyond here is only needed until the deprecation cycle in #2616
    -# is completed
    -
    -
    -_CONCAT_DIM_DEFAULT = "__infer_concat_dim__"
    -
    -
    -def auto_combine(
    -    datasets,
    -    concat_dim="_not_supplied",
    -    compat="no_conflicts",
    -    data_vars="all",
    -    coords="different",
    -    fill_value=dtypes.NA,
    -    join="outer",
    -    from_openmfds=False,
    -):
    -    """
    -    Attempt to auto-magically combine the given datasets into one.
    -
    -    This entire function is deprecated in favour of ``combine_nested`` and
    -    ``combine_by_coords``.
    -
    -    This method attempts to combine a list of datasets into a single entity by
    -    inspecting metadata and using a combination of concat and merge.
    -    It does not concatenate along more than one dimension or sort data under
    -    any circumstances. It does align coordinates, but different variables on
    -    datasets can cause it to fail under some scenarios. In complex cases, you
    -    may need to clean up your data and use ``concat``/``merge`` explicitly.
    -    ``auto_combine`` works well if you have N years of data and M data
    -    variables, and each combination of a distinct time period and set of data
    -    variables is saved its own dataset.
    -
    -    Parameters
    -    ----------
    -    datasets : sequence of xarray.Dataset
    -        Dataset objects to merge.
    -    concat_dim : str or DataArray or Index, optional
    -        Dimension along which to concatenate variables, as used by
    -        :py:func:`xarray.concat`. You only need to provide this argument if
    -        the dimension along which you want to concatenate is not a dimension
    -        in the original datasets, e.g., if you want to stack a collection of
    -        2D arrays along a third dimension.
    -        By default, xarray attempts to infer this argument by examining
    -        component files. Set ``concat_dim=None`` explicitly to disable
    -        concatenation.
    -    compat : {'identical', 'equals', 'broadcast_equals',
    -             'no_conflicts', 'override'}, optional
    -        String indicating how to compare variables of the same name for
    -        potential conflicts:
    -
    -        - 'broadcast_equals': all values must be equal when variables are
    -          broadcast against each other to ensure common dimensions.
    -        - 'equals': all values and dimensions must be the same.
    -        - 'identical': all values, dimensions and attributes must be the
    -          same.
    -        - 'no_conflicts': only values which are not null in both datasets
    -          must be equal. The returned dataset then contains the combination
    -          of all non-null values.
    -        - 'override': skip comparing and pick variable from first dataset
    -    data_vars : {'minimal', 'different', 'all' or list of str}, optional
    -        Details are in the documentation of concat
    -    coords : {'minimal', 'different', 'all' o list of str}, optional
    -        Details are in the documentation of concat
    -    fill_value : scalar, optional
    -        Value to use for newly missing values
    -    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
    -        String indicating how to combine differing indexes
    -        (excluding concat_dim) in objects
    -
    -        - 'outer': use the union of object indexes
    -        - 'inner': use the intersection of object indexes
    -        - 'left': use indexes from the first object with each dimension
    -        - 'right': use indexes from the last object with each dimension
    -        - 'exact': instead of aligning, raise `ValueError` when indexes to be
    -          aligned are not equal
    -        - 'override': if indexes are of same size, rewrite indexes to be
    -          those of the first object with that dimension. Indexes for the same
    -          dimension must have the same size in all objects.
    -
    -    Returns
    -    -------
    -    combined : xarray.Dataset
    -
    -    See also
    -    --------
    -    concat
    -    Dataset.merge
    -    """
    -
    -    if not from_openmfds:
    -        basic_msg = dedent(
    -            """\
    -        In xarray version 0.15 `auto_combine` will be deprecated. See
    -        http://xarray.pydata.org/en/stable/combining.html#combining-multi"""
    -        )
    -        warnings.warn(basic_msg, FutureWarning, stacklevel=2)
    -
    -    if concat_dim == "_not_supplied":
    -        concat_dim = _CONCAT_DIM_DEFAULT
    -        message = ""
    -    else:
    -        message = dedent(
    -            """\
    -        Also `open_mfdataset` will no longer accept a `concat_dim` argument.
    -        To get equivalent behaviour from now on please use the new
    -        `combine_nested` function instead (or the `combine='nested'` option to
    -        `open_mfdataset`)."""
    -        )
    -
    -    if _dimension_coords_exist(datasets):
    -        message += dedent(
    -            """\
    -        The datasets supplied have global dimension coordinates. You may want
    -        to use the new `combine_by_coords` function (or the
    -        `combine='by_coords'` option to `open_mfdataset`) to order the datasets
    -        before concatenation. Alternatively, to continue concatenating based
    -        on the order the datasets are supplied in future, please use the new
    -        `combine_nested` function (or the `combine='nested'` option to
    -        open_mfdataset)."""
    -        )
    -    else:
    -        message += dedent(
    -            """\
    -        The datasets supplied do not have global dimension coordinates. In
    -        future, to continue concatenating without supplying dimension
    -        coordinates, please use the new `combine_nested` function (or the
    -        `combine='nested'` option to open_mfdataset."""
    -        )
    -
    -    if _requires_concat_and_merge(datasets):
    -        manual_dims = [concat_dim].append(None)
    -        message += dedent(
    -            """\
    -        The datasets supplied require both concatenation and merging. From
    -        xarray version 0.15 this will operation will require either using the
    -        new `combine_nested` function (or the `combine='nested'` option to
    -        open_mfdataset), with a nested list structure such that you can combine
    -        along the dimensions {}. Alternatively if your datasets have global
    -        dimension coordinates then you can use the new `combine_by_coords`
    -        function.""".format(
    -                manual_dims
    -            )
    -        )
    -
    -    warnings.warn(message, FutureWarning, stacklevel=2)
    -
    -    return _old_auto_combine(
    -        datasets,
    -        concat_dim=concat_dim,
    -        compat=compat,
    -        data_vars=data_vars,
    -        coords=coords,
    -        fill_value=fill_value,
    -        join=join,
    -    )
    -
    -
    -def _dimension_coords_exist(datasets):
    -    """
    -    Check if the datasets have consistent global dimension coordinates
    -    which would in future be used by `auto_combine` for concatenation ordering.
    -    """
    -
    -    # Group by data vars
    -    sorted_datasets = sorted(datasets, key=vars_as_keys)
    -    grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    -
    -    # Simulates performing the multidimensional combine on each group of data
    -    # variables before merging back together
    -    try:
    -        for vars, datasets_with_same_vars in grouped_by_vars:
    -            _infer_concat_order_from_coords(list(datasets_with_same_vars))
    -        return True
    -    except ValueError:
    -        # ValueError means datasets don't have global dimension coordinates
    -        # Or something else went wrong in trying to determine them
    -        return False
    -
    -
    -def _requires_concat_and_merge(datasets):
    -    """
    -    Check if the datasets require the use of both xarray.concat and
    -    xarray.merge, which in future might require the user to use
    -    `manual_combine` instead.
    -    """
    -    # Group by data vars
    -    sorted_datasets = sorted(datasets, key=vars_as_keys)
    -    grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    -
    -    return len(list(grouped_by_vars)) > 1
    -
    -
    -def _old_auto_combine(
    -    datasets,
    -    concat_dim=_CONCAT_DIM_DEFAULT,
    -    compat="no_conflicts",
    -    data_vars="all",
    -    coords="different",
    -    fill_value=dtypes.NA,
    -    join="outer",
    -):
    -    if concat_dim is not None:
    -        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
    -
    -        sorted_datasets = sorted(datasets, key=vars_as_keys)
    -        grouped = itertools.groupby(sorted_datasets, key=vars_as_keys)
    -
    -        concatenated = [
    -            _auto_concat(
    -                list(datasets),
    -                dim=dim,
    -                data_vars=data_vars,
    -                coords=coords,
    -                compat=compat,
    -                fill_value=fill_value,
    -                join=join,
    -            )
    -            for vars, datasets in grouped
    -        ]
    -    else:
    -        concatenated = datasets
    -    merged = merge(concatenated, compat=compat, fill_value=fill_value, join=join)
    -    return merged
    -
    -
    -def _auto_concat(
    -    datasets,
    -    dim=None,
    -    data_vars="all",
    -    coords="different",
    -    fill_value=dtypes.NA,
    -    join="outer",
    -    compat="no_conflicts",
    -):
    -    if len(datasets) == 1 and dim is None:
    -        # There is nothing more to combine, so kick out early.
    -        return datasets[0]
    -    else:
    -        if dim is None:
    -            ds0 = datasets[0]
    -            ds1 = datasets[1]
    -            concat_dims = set(ds0.dims)
    -            if ds0.dims != ds1.dims:
    -                dim_tuples = set(ds0.dims.items()) - set(ds1.dims.items())
    -                concat_dims = {i for i, _ in dim_tuples}
    -            if len(concat_dims) > 1:
    -                concat_dims = {d for d in concat_dims if not ds0[d].equals(ds1[d])}
    -            if len(concat_dims) > 1:
    -                raise ValueError(
    -                    "too many different dimensions to " "concatenate: %s" % concat_dims
    -                )
    -            elif len(concat_dims) == 0:
    -                raise ValueError(
    -                    "cannot infer dimension to concatenate: "
    -                    "supply the ``concat_dim`` argument "
    -                    "explicitly"
    -                )
    -            (dim,) = concat_dims
    -        return concat(
    -            datasets,
    -            dim=dim,
    -            data_vars=data_vars,
    -            coords=coords,
    -            fill_value=fill_value,
    -            compat=compat,
    -        )
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 1e33eccb83e..6a840e6303e 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -2977,16 +2977,6 @@ def test_open_mfdataset_auto_combine(self):
                     with open_mfdataset([tmp2, tmp1], combine="by_coords") as actual:
                         assert_identical(original, actual)
     
    -    def test_open_mfdataset_combine_nested_no_concat_dim(self):
    -        original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                original.isel(x=slice(5)).to_netcdf(tmp1)
    -                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
    -
    -                with raises_regex(ValueError, "Must supply concat_dim"):
    -                    open_mfdataset([tmp2, tmp1], combine="nested")
    -
         @pytest.mark.xfail(reason="mfdataset loses encoding currently.")
         def test_encoding_mfdataset(self):
             original = Dataset(
    @@ -3080,6 +3070,15 @@ def test_open_mfdataset_concat_dim_none(self):
                     ) as actual:
                         assert_identical(data, actual)
     
    +    def test_open_mfdataset_concat_dim_default_none(self):
    +        with create_tmp_file() as tmp1:
    +            with create_tmp_file() as tmp2:
    +                data = Dataset({"x": 0})
    +                data.to_netcdf(tmp1)
    +                Dataset({"x": np.nan}).to_netcdf(tmp2)
    +                with open_mfdataset([tmp1, tmp2], combine="nested") as actual:
    +                    assert_identical(data, actual)
    +
         def test_open_dataset(self):
             original = Dataset({"foo": ("x", np.random.randn(10))})
             with create_tmp_file() as tmp:
    @@ -3203,73 +3202,6 @@ def test_load_dataarray(self):
                 ds.to_netcdf(tmp)
     
     
    -@requires_scipy_or_netCDF4
    -@requires_dask
    -class TestOpenMFDataSetDeprecation:
    -    """
    -    Set of tests to check that FutureWarnings are correctly raised until the
    -    deprecation cycle is complete. #2616
    -    """
    -
    -    def test_open_mfdataset_default(self):
    -        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(
    -                    FutureWarning, match="default behaviour of" " `open_mfdataset`"
    -                ):
    -                    open_mfdataset([tmp1, tmp2])
    -
    -    def test_open_mfdataset_with_concat_dim(self):
    -        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(FutureWarning, match="`concat_dim`"):
    -                    open_mfdataset([tmp1, tmp2], concat_dim="x")
    -
    -    def test_auto_combine_with_merge_and_concat(self):
    -        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
    -        ds3 = Dataset({"z": ((), 99)})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                with create_tmp_file() as tmp3:
    -                    ds1.to_netcdf(tmp1)
    -                    ds2.to_netcdf(tmp2)
    -                    ds3.to_netcdf(tmp3)
    -
    -                    with pytest.warns(
    -                        FutureWarning, match="require both concatenation"
    -                    ):
    -                        open_mfdataset([tmp1, tmp2, tmp3])
    -
    -    def test_auto_combine_with_coords(self):
    -        ds1 = Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])})
    -        ds2 = Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(FutureWarning, match="supplied have global"):
    -                    open_mfdataset([tmp1, tmp2])
    -
    -    def test_auto_combine_without_coords(self):
    -        ds1, ds2 = Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(FutureWarning, match="supplied do not have global"):
    -                    open_mfdataset([tmp1, tmp2])
    -
    -
     @requires_scipy_or_netCDF4
     @requires_pydap
     @pytest.mark.filterwarnings("ignore:The binary mode of fromstring is deprecated")
    diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
    index c3f981f10d1..59f61f59722 100644
    --- a/xarray/tests/test_combine.py
    +++ b/xarray/tests/test_combine.py
    @@ -4,14 +4,7 @@
     import numpy as np
     import pytest
     
    -from xarray import (
    -    DataArray,
    -    Dataset,
    -    auto_combine,
    -    combine_by_coords,
    -    combine_nested,
    -    concat,
    -)
    +from xarray import DataArray, Dataset, combine_by_coords, combine_nested, concat
     from xarray.core import dtypes
     from xarray.core.combine import (
         _check_shape_tile_ids,
    @@ -818,173 +811,6 @@ def test_combine_by_coords_incomplete_hypercube(self):
                 combine_by_coords([x1, x2, x3], fill_value=None)
     
     
    -@pytest.mark.filterwarnings(
    -    "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"
    -)
    -@pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer")
    -@pytest.mark.filterwarnings("ignore:The datasets supplied")
    -class TestAutoCombineOldAPI:
    -    """
    -    Set of tests which check that old 1-dimensional auto_combine behaviour is
    -    still satisfied. #2616
    -    """
    -
    -    def test_auto_combine(self):
    -        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": [0, 1]})
    -        assert_identical(expected, actual)
    -
    -        actual = auto_combine([actual])
    -        assert_identical(expected, actual)
    -
    -        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": [0, 1, 2]})
    -        assert_identical(expected, actual)
    -
    -        # ensure auto_combine handles non-sorted variables
    -        objs = [
    -            Dataset({"x": ("a", [0]), "y": ("a", [0])}),
    -            Dataset({"y": ("a", [1]), "x": ("a", [1])}),
    -        ]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
    -        assert_identical(expected, actual)
    -
    -        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
    -        with raises_regex(ValueError, "too many .* dimensions"):
    -            auto_combine(objs)
    -
    -        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
    -        with raises_regex(ValueError, "cannot infer dimension"):
    -            auto_combine(objs)
    -
    -        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
    -        with raises_regex(ValueError, "'y' is not present in all datasets"):
    -            auto_combine(objs)
    -
    -    def test_auto_combine_previously_failed(self):
    -        # In the above scenario, one file is missing, containing the data for
    -        # one year's data for one variable.
    -        datasets = [
    -            Dataset({"a": ("x", [0]), "x": [0]}),
    -            Dataset({"b": ("x", [0]), "x": [0]}),
    -            Dataset({"a": ("x", [1]), "x": [1]}),
    -        ]
    -        expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]})
    -        actual = auto_combine(datasets)
    -        assert_identical(expected, actual)
    -
    -        # Your data includes "time" and "station" dimensions, and each year's
    -        # data has a different set of stations.
    -        datasets = [
    -            Dataset({"a": ("x", [2, 3]), "x": [1, 2]}),
    -            Dataset({"a": ("x", [1, 2]), "x": [0, 1]}),
    -        ]
    -        expected = Dataset(
    -            {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]}
    -        )
    -        actual = auto_combine(datasets, concat_dim="t")
    -        assert_identical(expected, actual)
    -
    -    def test_auto_combine_with_new_variables(self):
    -        datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})]
    -        actual = auto_combine(datasets, "y")
    -        expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1})
    -        assert_identical(expected, actual)
    -
    -    def test_auto_combine_no_concat(self):
    -        objs = [Dataset({"x": 0}), Dataset({"y": 1})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": 0, "y": 1})
    -        assert_identical(expected, actual)
    -
    -        objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": 0, "y": 1, "z": 2})
    -        assert_identical(expected, actual)
    -
    -        data = Dataset({"x": 0})
    -        actual = auto_combine([data, data, data], concat_dim=None)
    -        assert_identical(data, actual)
    -
    -        # Single object, with a concat_dim explicitly provided
    -        # Test the issue reported in GH #1988
    -        objs = [Dataset({"x": 0, "y": 1})]
    -        dim = DataArray([100], name="baz", dims="baz")
    -        actual = auto_combine(objs, concat_dim=dim)
    -        expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]})
    -        assert_identical(expected, actual)
    -
    -        # Just making sure that auto_combine is doing what is
    -        # expected for non-scalar values, too.
    -        objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})]
    -        dim = DataArray([100], name="baz", dims="baz")
    -        actual = auto_combine(objs, concat_dim=dim)
    -        expected = Dataset(
    -            {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])},
    -            {"baz": [100]},
    -        )
    -        assert_identical(expected, actual)
    -
    -    def test_auto_combine_order_by_appearance_not_coords(self):
    -        objs = [
    -            Dataset({"foo": ("x", [0])}, coords={"x": ("x", [1])}),
    -            Dataset({"foo": ("x", [1])}, coords={"x": ("x", [0])}),
    -        ]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"foo": ("x", [0, 1])}, coords={"x": ("x", [1, 0])})
    -        assert_identical(expected, actual)
    -
    -    @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0])
    -    def test_auto_combine_fill_value(self, fill_value):
    -        datasets = [
    -            Dataset({"a": ("x", [2, 3]), "x": [1, 2]}),
    -            Dataset({"a": ("x", [1, 2]), "x": [0, 1]}),
    -        ]
    -        if fill_value == dtypes.NA:
    -            # if we supply the default, we expect the missing value for a
    -            # float array
    -            fill_value = np.nan
    -        expected = Dataset(
    -            {"a": (("t", "x"), [[fill_value, 2, 3], [1, 2, fill_value]])},
    -            {"x": [0, 1, 2]},
    -        )
    -        actual = auto_combine(datasets, concat_dim="t", fill_value=fill_value)
    -        assert_identical(expected, actual)
    -
    -
    -class TestAutoCombineDeprecation:
    -    """
    -    Set of tests to check that FutureWarnings are correctly raised until the
    -    deprecation cycle is complete. #2616
    -    """
    -
    -    def test_auto_combine_with_concat_dim(self):
    -        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
    -        with pytest.warns(FutureWarning, match="`concat_dim`"):
    -            auto_combine(objs, concat_dim="x")
    -
    -    def test_auto_combine_with_merge_and_concat(self):
    -        objs = [Dataset({"x": [0]}), Dataset({"x": [1]}), Dataset({"z": ((), 99)})]
    -        with pytest.warns(FutureWarning, match="require both concatenation"):
    -            auto_combine(objs)
    -
    -    def test_auto_combine_with_coords(self):
    -        objs = [
    -            Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}),
    -            Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}),
    -        ]
    -        with pytest.warns(FutureWarning, match="supplied have global"):
    -            auto_combine(objs)
    -
    -    def test_auto_combine_without_coords(self):
    -        objs = [Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})]
    -        with pytest.warns(FutureWarning, match="supplied do not have global"):
    -            auto_combine(objs)
    -
    -
     @requires_cftime
     def test_combine_by_coords_distant_cftime_dates():
         # Regression test for https://github.com/pydata/xarray/issues/3535
    
    From 5121d867a50af328353153a3bbc7656c154a602f Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Wed, 24 Jun 2020 20:24:54 +0200
    Subject: [PATCH 065/342] use assert_allclose in the aggregation-with-units
     tests (#4174)
    
    * use assert_allclose in the aggregation tests
    
    * install pint using pip
    ---
     ci/requirements/py36-min-nep18.yml    | 3 ++-
     ci/requirements/py36.yml              | 2 +-
     ci/requirements/py37-windows.yml      | 2 +-
     ci/requirements/py37.yml              | 2 +-
     ci/requirements/py38-all-but-dask.yml | 2 +-
     ci/requirements/py38.yml              | 2 +-
     xarray/tests/test_units.py            | 6 +++---
     7 files changed, 10 insertions(+), 9 deletions(-)
    
    diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
    index 48b9c057260..cd2b1a18c77 100644
    --- a/ci/requirements/py36-min-nep18.yml
    +++ b/ci/requirements/py36-min-nep18.yml
    @@ -11,7 +11,6 @@ dependencies:
       - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
       - numpy=1.17
       - pandas=0.25
    -  - pint
       - pip
       - pytest
       - pytest-cov
    @@ -19,3 +18,5 @@ dependencies:
       - scipy=1.2
       - setuptools=41.2
       - sparse=0.8
    +  - pip:
    +      - pint==0.13
    diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
    index a500173f277..aa2baf9dcce 100644
    --- a/ci/requirements/py36.yml
    +++ b/ci/requirements/py36.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
    index e9e5c7a900a..8b12704d644 100644
    --- a/ci/requirements/py37-windows.yml
    +++ b/ci/requirements/py37-windows.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
    index dba3926596e..70c453e8776 100644
    --- a/ci/requirements/py37.yml
    +++ b/ci/requirements/py37.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
    index a375d9e1e5a..6d76eecbd6a 100644
    --- a/ci/requirements/py38-all-but-dask.yml
    +++ b/ci/requirements/py38-all-but-dask.yml
    @@ -25,7 +25,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -42,3 +41,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
    index 7dff3a1bd97..6f35138978c 100644
    --- a/ci/requirements/py38.yml
    +++ b/ci/requirements/py38.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index b477e8cccb2..fb9063ca49e 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -1438,7 +1438,7 @@ def test_aggregation(self, func, dtype):
             actual = func(variable)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_allclose(expected, actual)
     
         # TODO: remove once pint==0.12 has been released
         @pytest.mark.xfail(
    @@ -2296,7 +2296,7 @@ def test_aggregation(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3861,7 +3861,7 @@ def test_aggregation(self, func, dtype):
             expected = attach_units(func(strip_units(ds)), units)
     
             assert_units_equal(expected, actual)
    -        assert_equal(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize("property", ("imag", "real"))
         def test_numpy_properties(self, property, dtype):
    
    From f4638afe009fde5f53de1a1b80cc71f62593c463 Mon Sep 17 00:00:00 2001
    From: Pascal Bourgault 
    Date: Wed, 24 Jun 2020 23:59:51 -0400
    Subject: [PATCH 066/342] Correct dask handling for 1D idxmax/min on ND data
     (#4135)
    
    * Correct dask handling for 1D idxmax/min on ND data
    
    * Passing black and others
    
    * Edit Whats New
    ---
     doc/whats-new.rst              |  4 ++--
     xarray/core/computation.py     |  2 +-
     xarray/tests/test_dataarray.py | 19 +++++++++++++++++++
     3 files changed, 22 insertions(+), 3 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index a4ec85c1950..d82be79270e 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -80,8 +80,8 @@ New Features
       :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:issue:`60`, :pull:`3871`)
       By `Todd Jennings `_
     - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
    -  :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:pull:`3922`)
    -  By `Kai Mühlbauer `_.
    +  :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:pull:`3922`, :pull:`4135`)
    +  By `Kai Mühlbauer `_ and `Pascal Bourgault `_.
     - More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`)
       By `Justus Magin `_.
     - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index cecd4fd8e70..4f4fd475c82 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1563,7 +1563,7 @@ def _calc_idxminmax(
     
             chunks = dict(zip(array.dims, array.chunks))
             dask_coord = dask.array.from_array(array[dim].data, chunks=chunks[dim])
    -        res = indx.copy(data=dask_coord[(indx.data,)])
    +        res = indx.copy(data=dask_coord[indx.data.ravel()].reshape(indx.shape))
             # we need to attach back the dim name
             res.name = dim
         else:
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index 8fc37ac458d..d942667a4c7 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -5257,6 +5257,25 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
             assert_identical(result7, expected7)
     
     
    +class TestReduceND(TestReduce):
    +    @pytest.mark.parametrize("op", ["idxmin", "idxmax"])
    +    @pytest.mark.parametrize("ndim", [3, 5])
    +    def test_idxminmax_dask(self, op, ndim):
    +        if not has_dask:
    +            pytest.skip("requires dask")
    +
    +        ar0_raw = xr.DataArray(
    +            np.random.random_sample(size=[10] * ndim),
    +            dims=[i for i in "abcdefghij"[: ndim - 1]] + ["x"],
    +            coords={"x": np.arange(10)},
    +            attrs=self.attrs,
    +        )
    +
    +        ar0_dsk = ar0_raw.chunk({})
    +        # Assert idx is the same with dask and without
    +        assert_equal(getattr(ar0_dsk, op)(dim="x"), getattr(ar0_raw, op)(dim="x"))
    +
    +
     @pytest.fixture(params=[1])
     def da(request):
         if request.param == 1:
    
    From 65ca92a5c0a4143d00dd7a822bcb1d49738717f1 Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Wed, 24 Jun 2020 23:20:56 -0700
    Subject: [PATCH 067/342] Add CONTRIBUTING.md for the benefit of GitHub
    
    ---
     CONTRIBUTING.md | 1 +
     1 file changed, 1 insertion(+)
     create mode 100644 CONTRIBUTING.md
    
    diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
    new file mode 100644
    index 00000000000..7a909aefd08
    --- /dev/null
    +++ b/CONTRIBUTING.md
    @@ -0,0 +1 @@
    +Xarray's contributor guidelines [can be found in our online documentation](http://xarray.pydata.org/en/stable/contributing.html)
    
    From 732750a06aef2025b206ba6ff765f5acc53bfa25 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sat, 27 Jun 2020 10:31:11 +0200
    Subject: [PATCH 068/342] Blackdoc (#4177)
    
    * add blackdoc to the pre-commit configuration
    
    * use the stable version of blackdoc
    
    * run blackdoc on all files
    
    * add blackdoc to the linter / formatting tools section
    
    * use language names to enable syntax highlighting
    
    * update whats-new.rst
    ---
     .pre-commit-config.yaml    |  4 +++
     doc/contributing.rst       | 21 ++++++++++-----
     doc/dask.rst               |  5 +++-
     doc/internals.rst          |  9 ++++---
     doc/plotting.rst           |  4 +--
     doc/whats-new.rst          |  3 +++
     xarray/core/computation.py | 52 +++++++++++++++++++++++++-------------
     xarray/core/parallel.py    |  5 +++-
     8 files changed, 71 insertions(+), 32 deletions(-)
    
    diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
    index 1d384e58a3c..447f0007fc2 100644
    --- a/.pre-commit-config.yaml
    +++ b/.pre-commit-config.yaml
    @@ -11,6 +11,10 @@ repos:
         rev: stable
         hooks:
           - id: black
    +  - repo: https://github.com/keewis/blackdoc
    +    rev: stable
    +    hooks:
    +      - id: blackdoc
       - repo: https://gitlab.com/pycqa/flake8
         rev: 3.7.9
         hooks:
    diff --git a/doc/contributing.rst b/doc/contributing.rst
    index 51dba2bb0cc..9e6a3c250e9 100644
    --- a/doc/contributing.rst
    +++ b/doc/contributing.rst
    @@ -148,7 +148,7 @@ We'll now kick off a two-step process:
     1. Install the build dependencies
     2. Build and install xarray
     
    -.. code-block:: none
    +.. code-block:: sh
     
        # Create and activate the build environment
        # This is for Linux and MacOS. On Windows, use py37-windows.yml instead.
    @@ -162,7 +162,10 @@ We'll now kick off a two-step process:
        # Build and install xarray
        pip install -e .
     
    -At this point you should be able to import *xarray* from your locally built version::
    +At this point you should be able to import *xarray* from your locally
    +built version:
    +
    +.. code-block:: sh
     
        $ python  # start an interpreter
        >>> import xarray
    @@ -256,7 +259,9 @@ Some other important things to know about the docs:
     - The tutorials make heavy use of the `ipython directive
       `_ sphinx extension.
       This directive lets you put code in the documentation which will be run
    -  during the doc build. For example::
    +  during the doc build. For example:
    +
    +  .. code:: rst
     
           .. ipython:: python
     
    @@ -290,7 +295,7 @@ Requirements
     Make sure to follow the instructions on :ref:`creating a development environment above `, but
     to build the docs you need to use the environment file ``ci/requirements/doc.yml``.
     
    -.. code-block:: none
    +.. code-block:: sh
     
         # Create and activate the docs environment
         conda env create -f ci/requirements/doc.yml
    @@ -347,7 +352,10 @@ Code Formatting
     
     xarray uses several tools to ensure a consistent code format throughout the project:
     
    -- `Black `_ for standardized code formatting
    +- `Black `_ for standardized
    +  code formatting
    +- `blackdoc `_ for
    +  standardized code formatting in documentation
     - `Flake8 `_ for general code quality
     - `isort `_ for standardized order in imports.
       See also `flake8-isort `_.
    @@ -356,12 +364,13 @@ xarray uses several tools to ensure a consistent code format throughout the proj
     
     ``pip``::
     
    -   pip install black flake8 isort mypy
    +   pip install black flake8 isort mypy blackdoc
     
     and then run from the root of the Xarray repository::
     
        isort -rc .
        black -t py36 .
    +   blackdoc -t py36 .
        flake8
        mypy .
     
    diff --git a/doc/dask.rst b/doc/dask.rst
    index df223982ba4..de25ee2200e 100644
    --- a/doc/dask.rst
    +++ b/doc/dask.rst
    @@ -432,6 +432,7 @@ received by the applied function.
             print(da.sizes)
             return da.time
     
    +
         mapped = xr.map_blocks(func, ds.temperature)
         mapped
     
    @@ -461,9 +462,10 @@ Here is a common example where automated inference will not work.
         :okexcept:
     
         def func(da):
    -	print(da.sizes)
    +        print(da.sizes)
             return da.isel(time=[1])
     
    +
         mapped = xr.map_blocks(func, ds.temperature)
     
     ``func`` cannot be run on 0-shaped inputs because it is not possible to extract element 1 along a
    @@ -501,6 +503,7 @@ Notice that the 0-shaped sizes were not printed to screen. Since ``template`` ha
         def func(obj, a, b=0):
             return obj + a + b
     
    +
         mapped = ds.map_blocks(func, args=[10], kwargs={"b": 10})
         expected = ds + 10 + 10
         mapped.identical(expected)
    diff --git a/doc/internals.rst b/doc/internals.rst
    index 27c7c4e1d87..46c117e312b 100644
    --- a/doc/internals.rst
    +++ b/doc/internals.rst
    @@ -182,9 +182,10 @@ re-open it directly with Zarr:
     
     .. ipython:: python
     
    -    ds = xr.tutorial.load_dataset('rasm')
    -    ds.to_zarr('rasm.zarr', mode='w')
    +    ds = xr.tutorial.load_dataset("rasm")
    +    ds.to_zarr("rasm.zarr", mode="w")
         import zarr
    -    zgroup = zarr.open('rasm.zarr')
    +
    +    zgroup = zarr.open("rasm.zarr")
         print(zgroup.tree())
    -    dict(zgroup['Tair'].attrs)
    +    dict(zgroup["Tair"].attrs)
    \ No newline at end of file
    diff --git a/doc/plotting.rst b/doc/plotting.rst
    index f98f47f2567..72248e31b1e 100644
    --- a/doc/plotting.rst
    +++ b/doc/plotting.rst
    @@ -220,7 +220,7 @@ from the time and assign it as a non-dimension coordinate:
     
     .. ipython:: python
     
    -    decimal_day = (air1d.time - air1d.time[0]) /  pd.Timedelta('1d')
    +    decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d")
         air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day))
         air1d_multi
     
    @@ -911,4 +911,4 @@ One can also make line plots with multidimensional coordinates. In this case, ``
         f, ax = plt.subplots(2, 1)
         da.plot.line(x="lon", hue="y", ax=ax[0])
         @savefig plotting_example_2d_hue_xy.png
    -    da.plot.line(x="lon", hue="x", ax=ax[1])
    +    da.plot.line(x="lon", hue="x", ax=ax[1])
    \ No newline at end of file
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index d82be79270e..27d369dd6f7 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -197,6 +197,9 @@ Internal Changes
     - Run the ``isort`` pre-commit hook only on python source files
       and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`)
       By `Justus Magin `_.
    +- Add `blackdoc `_ to the list of
    +  checkers for development. (:pull:`4177`)
    +  By `Justus Magin `_.
     - Add a CI job that runs the tests with every optional dependency
       except ``dask``. (:issue:`3794`, :pull:`3919`)
       By `Justus Magin `_.
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 4f4fd475c82..d8a0c53e817 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1096,10 +1096,14 @@ def cov(da_a, da_b, dim=None, ddof=1):
     
         Examples
         --------
    -    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a = DataArray(
    +    ...     np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_a
         
         array([[1. , 2. , 3. ],
    @@ -1108,10 +1112,14 @@ def cov(da_a, da_b, dim=None, ddof=1):
         Coordinates:
           * space    (space) >> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b = DataArray(
    +    ...     np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_b
         
         array([[ 0.2,  0.4,  0.6],
    @@ -1123,7 +1131,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
         >>> xr.cov(da_a, da_b)
         
         array(-3.53055556)
    -    >>> xr.cov(da_a, da_b, dim='time')
    +    >>> xr.cov(da_a, da_b, dim="time")
         
         array([ 0.2, -0.5,  1.69333333])
         Coordinates:
    @@ -1165,10 +1173,14 @@ def corr(da_a, da_b, dim=None):
     
         Examples
         --------
    -    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a = DataArray(
    +    ...     np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_a
         
         array([[1. , 2. , 3. ],
    @@ -1177,10 +1189,14 @@ def corr(da_a, da_b, dim=None):
         Coordinates:
           * space    (space) >> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b = DataArray(
    +    ...     np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_b
         
         array([[ 0.2,  0.4,  0.6],
    @@ -1192,7 +1208,7 @@ def corr(da_a, da_b, dim=None):
         >>> xr.corr(da_a, da_b)
         
         array(-0.57087777)
    -    >>> xr.corr(da_a, da_b, dim='time')
    +    >>> xr.corr(da_a, da_b, dim="time")
         
         array([ 1., -1.,  1.])
         Coordinates:
    diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
    index 3a77753d0d1..86044e72dd2 100644
    --- a/xarray/core/parallel.py
    +++ b/xarray/core/parallel.py
    @@ -252,7 +252,10 @@ def map_blocks(
         to the function being applied in ``xr.map_blocks()``:
     
         >>> xr.map_blocks(
    -    ...     calculate_anomaly, array, kwargs={"groupby_type": "time.year"}, template=array,
    +    ...     calculate_anomaly,
    +    ...     array,
    +    ...     kwargs={"groupby_type": "time.year"},
    +    ...     template=array,
         ... )
         
         array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    
    From a64cf2d5476e7bbda099b34c40b7be1880dbd39a Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Sun, 28 Jun 2020 10:03:39 -0700
    Subject: [PATCH 069/342] Show data by default in HTML repr for DataArray
     (#4182)
    
    * Show data by default in HTML repr for DataArray
    
    Fixes GH-4176
    
    * add whats new for html repr
    
    * fix test
    ---
     doc/whats-new.rst                    | 4 +++-
     xarray/core/formatting_html.py       | 2 +-
     xarray/tests/test_formatting_html.py | 4 ++--
     3 files changed, 6 insertions(+), 4 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 27d369dd6f7..c1440ec1108 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -39,7 +39,9 @@ Breaking changes
       the default behaviour of :py:func:`open_mfdataset` has changed to use
       ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`)
       By `Tom Nicholas `_.
    -
    +- The ``DataArray`` and ``Variable`` HTML reprs now expand the data section by
    +  default (:issue:`4176`)
    +  By `Stephan Hoyer `_.
     
     Enhancements
     ~~~~~~~~~~~~
    diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
    index c99683e91c7..400ef61502e 100644
    --- a/xarray/core/formatting_html.py
    +++ b/xarray/core/formatting_html.py
    @@ -184,7 +184,7 @@ def dim_section(obj):
     def array_section(obj):
         # "unique" id to expand/collapse the section
         data_id = "section-" + str(uuid.uuid4())
    -    collapsed = ""
    +    collapsed = "checked"
         variable = getattr(obj, "variable", obj)
         preview = escape(inline_variable_array_repr(variable, max_width=70))
         data_repr = short_data_repr_html(obj)
    diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py
    index ea636403318..9a210ad6fa3 100644
    --- a/xarray/tests/test_formatting_html.py
    +++ b/xarray/tests/test_formatting_html.py
    @@ -108,8 +108,8 @@ def test_summarize_attrs_with_unsafe_attr_name_and_value():
     def test_repr_of_dataarray(dataarray):
         formatted = fh.array_repr(dataarray)
         assert "dim_0" in formatted
    -    # has an expandable data section
    -    assert formatted.count("class='xr-array-in' type='checkbox' >") == 1
    +    # has an expanded data section
    +    assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 1
         # coords and attrs don't have an items so they'll be be disabled and collapsed
         assert (
             formatted.count("class='xr-section-summary-in' type='checkbox' disabled >") == 2
    
    From bdcfab524ef1c852abe6dabcfabc7292f058fddc Mon Sep 17 00:00:00 2001
    From: johnomotani 
    Date: Mon, 29 Jun 2020 20:36:24 +0100
    Subject: [PATCH 070/342] Support multiple dimensions in DataArray.argmin() and
     DataArray.argmax() methods (#3936)
    
    * DataArray.indices_min() and DataArray.indices_max() methods
    
    These return dicts of the indices of the minimum or maximum of a
    DataArray over several dimensions.
    
    * Update whats-new.rst and api.rst with indices_min(), indices_max()
    
    * Fix type checking in DataArray._unravel_argminmax()
    
    * Fix expected results for TestReduce3D.test_indices_max()
    
    * Respect global default for keep_attrs
    
    * Merge behaviour of indices_min/indices_max into argmin/argmax
    
    When argmin or argmax are called with a sequence for 'dim', they now
    return a dict with the indices for each dimension in dim.
    
    * Basic overload of argmin() and argmax() for Dataset
    
    If single dim is passed to Dataset.argmin() or Dataset.argmax(), then
    pass through to _argmin_base or _argmax_base. If a sequence is passed
    for dim, raise an exception, because the result for each DataArray would
    be a dict, which cannot be stored in a Dataset.
    
    * Update Variable and dask tests with _argmin_base, _argmax_base
    
    The basic numpy-style argmin() and argmax() methods were renamed when
    adding support for handling multiple dimensions in DataArray.argmin()
    and DataArray.argmax(). Variable.argmin() and Variable.argmax() are
    therefore renamed as Variable._argmin_base() and
    Variable._argmax_base().
    
    * Update api-hidden.rst with _argmin_base and _argmax_base
    
    * Explicitly defined class methods override injected methods
    
    If a method (such as 'argmin') has been explicitly defined on a class
    (so that hasattr(cls, "argmin")==True), then do not inject that method,
    as it would override the explicitly defined one. Instead inject a
    private method, prefixed by "_injected_" (such as '_injected_argmin'), so
    that the injected method is available to the explicitly defined one.
    
    Do not perform the hasattr check on binary ops, because this breaks
    some operations (e.g. addition between DataArray and int in
    test_dask.py).
    
    * Move StringAccessor back to bottom of DataArray class definition
    
    * Revert use of _argmin_base and _argmax_base
    
    Now not needed because of change to injection in ops.py.
    
    * Move implementation of argmin, argmax from DataArray to Variable
    
    Makes use of argmin and argmax more general (they are available for
    Variable) and is straightforward for DataArray to wrap the Variable
    version.
    
    * Update tests for change to coordinates on result of argmin, argmax
    
    * Add 'out' keyword to argmin/argmax methods - allow numpy call signature
    
    When np.argmin(da) is called, numpy passes an 'out' keyword argument to
    argmin/argmax. Need to allow this argument to avoid errors (but an
    exception is thrown if out is not None).
    
    * Update and correct docstrings for argmin and argmax
    
    * Correct suggested replacement for da.argmin() and da.argmax()
    
    * Remove use of _injected_ methods in argmin/argmax
    
    * Fix typo in name of argminmax_func
    
    Co-Authored-By: keewis 
    
    * Mark argminmax argument to _unravel_argminmax as a string
    
    Co-Authored-By: keewis 
    
    * Hidden internal methods don't need to appear in docs
    
    * Basic docstrings for Dataset.argmin() and Dataset.argmax()
    
    * Set stacklevel for DeprecationWarning in argmin/argmax methods
    
    * Revert "Explicitly defined class methods override injected methods"
    
    This reverts commit 8caf2b8d07c14a2956a26b50ee08d83323c36058.
    
    * Revert "Add 'out' keyword to argmin/argmax methods - allow numpy call signature"
    
    This reverts commit ab480b5c88a059264086260e5090eb38b98aa7fa.
    
    * Remove argmin and argmax from ops.py
    
    * Use self.reduce() in Dataset.argmin() and Dataset.argmax()
    
    Replaces need for "_injected_argmin" and "_injected_argmax".
    
    * Whitespace after 'title' lines in docstrings
    
    * Remove tests of np.argmax() and np.argmin() functions from test_units.py
    
    Applying numpy functions to xarray objects is not necessarily expected
    to work, and the wrapping of argmin() and argmax() is broken by
    xarray-specific interface of argmin() and argmax() methods of Variable,
    DataArray and Dataset.
    
    * Clearer deprecation warnings in Dataset.argmin() and Dataset.argmax()
    
    Also, previously suggested workaround was not correct. Remove suggestion
    as there is no workaround (but the removed behaviour is unlikely to be
    useful).
    
    * Add unravel_index to duck_array_ops, use in Variable._unravel_argminmax
    
    * Filter argmin/argmax DeprecationWarnings in tests
    
    * Correct test for exception for nan in test_argmax
    
    * Remove injected argmin and argmax methods from api-hidden.rst
    
    * flake8 fixes
    
    * Tidy up argmin/argmax following code review
    
    Co-authored-by: Deepak Cherian 
    
    * Remove filters for warnings from argmin/argmax from tests
    
    Pass an explicit axis or dim argument instead to avoid the warning.
    
    * Swap order of reduce_dims checks in Dataset.reduce()
    
    Prefer to pass reduce_dims=None when possible, including for variables
    with only one dimension. Avoids an error if an 'axis' keyword was
    passed.
    
    * revert the changes to Dataset.reduce
    
    * use dim instead of axis
    
    * use dimension instead of Ellipsis
    
    * Make passing 'dim=...' to Dataset.argmin() or Dataset.argmax() an error
    
    * Better docstrings for Dataset.argmin() and Dataset.argmax()
    
    * Update doc/whats-new.rst
    
    Co-authored-by: keewis 
    
    Co-authored-by: Stephan Hoyer 
    Co-authored-by: keewis 
    Co-authored-by: Deepak Cherian 
    Co-authored-by: Keewis 
    ---
     doc/api-hidden.rst             |  20 -
     doc/whats-new.rst              |   7 +
     xarray/core/dataarray.py       | 203 ++++++++
     xarray/core/dataset.py         | 126 +++++
     xarray/core/duck_array_ops.py  |   1 +
     xarray/core/ops.py             |   2 -
     xarray/core/variable.py        | 172 ++++++-
     xarray/tests/test_dataarray.py | 823 +++++++++++++++++++++++++++++++++
     xarray/tests/test_dataset.py   |   6 +
     xarray/tests/test_units.py     |  97 +++-
     xarray/tests/test_variable.py  |   2 +-
     11 files changed, 1415 insertions(+), 44 deletions(-)
    
    diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
    index 5542e488143..efef4259b74 100644
    --- a/doc/api-hidden.rst
    +++ b/doc/api-hidden.rst
    @@ -41,8 +41,6 @@
     
        core.rolling.DatasetCoarsen.all
        core.rolling.DatasetCoarsen.any
    -   core.rolling.DatasetCoarsen.argmax
    -   core.rolling.DatasetCoarsen.argmin
        core.rolling.DatasetCoarsen.count
        core.rolling.DatasetCoarsen.max
        core.rolling.DatasetCoarsen.mean
    @@ -68,8 +66,6 @@
        core.groupby.DatasetGroupBy.where
        core.groupby.DatasetGroupBy.all
        core.groupby.DatasetGroupBy.any
    -   core.groupby.DatasetGroupBy.argmax
    -   core.groupby.DatasetGroupBy.argmin
        core.groupby.DatasetGroupBy.count
        core.groupby.DatasetGroupBy.max
        core.groupby.DatasetGroupBy.mean
    @@ -85,8 +81,6 @@
        core.resample.DatasetResample.all
        core.resample.DatasetResample.any
        core.resample.DatasetResample.apply
    -   core.resample.DatasetResample.argmax
    -   core.resample.DatasetResample.argmin
        core.resample.DatasetResample.assign
        core.resample.DatasetResample.assign_coords
        core.resample.DatasetResample.bfill
    @@ -110,8 +104,6 @@
        core.resample.DatasetResample.dims
        core.resample.DatasetResample.groups
     
    -   core.rolling.DatasetRolling.argmax
    -   core.rolling.DatasetRolling.argmin
        core.rolling.DatasetRolling.count
        core.rolling.DatasetRolling.max
        core.rolling.DatasetRolling.mean
    @@ -185,8 +177,6 @@
     
        core.rolling.DataArrayCoarsen.all
        core.rolling.DataArrayCoarsen.any
    -   core.rolling.DataArrayCoarsen.argmax
    -   core.rolling.DataArrayCoarsen.argmin
        core.rolling.DataArrayCoarsen.count
        core.rolling.DataArrayCoarsen.max
        core.rolling.DataArrayCoarsen.mean
    @@ -211,8 +201,6 @@
        core.groupby.DataArrayGroupBy.where
        core.groupby.DataArrayGroupBy.all
        core.groupby.DataArrayGroupBy.any
    -   core.groupby.DataArrayGroupBy.argmax
    -   core.groupby.DataArrayGroupBy.argmin
        core.groupby.DataArrayGroupBy.count
        core.groupby.DataArrayGroupBy.max
        core.groupby.DataArrayGroupBy.mean
    @@ -228,8 +216,6 @@
        core.resample.DataArrayResample.all
        core.resample.DataArrayResample.any
        core.resample.DataArrayResample.apply
    -   core.resample.DataArrayResample.argmax
    -   core.resample.DataArrayResample.argmin
        core.resample.DataArrayResample.assign_coords
        core.resample.DataArrayResample.bfill
        core.resample.DataArrayResample.count
    @@ -252,8 +238,6 @@
        core.resample.DataArrayResample.dims
        core.resample.DataArrayResample.groups
     
    -   core.rolling.DataArrayRolling.argmax
    -   core.rolling.DataArrayRolling.argmin
        core.rolling.DataArrayRolling.count
        core.rolling.DataArrayRolling.max
        core.rolling.DataArrayRolling.mean
    @@ -423,8 +407,6 @@
     
        IndexVariable.all
        IndexVariable.any
    -   IndexVariable.argmax
    -   IndexVariable.argmin
        IndexVariable.argsort
        IndexVariable.astype
        IndexVariable.broadcast_equals
    @@ -564,8 +546,6 @@
        CFTimeIndex.all
        CFTimeIndex.any
        CFTimeIndex.append
    -   CFTimeIndex.argmax
    -   CFTimeIndex.argmin
        CFTimeIndex.argsort
        CFTimeIndex.asof
        CFTimeIndex.asof_locs
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index c1440ec1108..086cddee0a0 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -54,6 +54,13 @@ Enhancements
     
     New Features
     ~~~~~~~~~~~~
    +- :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support
    +  sequences of 'dim' arguments, and if a sequence is passed return a dict
    +  (which can be passed to :py:meth:`isel` to get the value of the minimum) of
    +  the indices for each dimension of the minimum or maximum of a DataArray.
    +  (:pull:`3936`)
    +  By `John Omotani `_, thanks to `Keisuke Fujii
    +  `_ for work in :pull:`1469`.
     - Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`).
       By `Pascal Bourgault `_.
     - ``chunks='auto'`` is now supported in the ``chunks`` argument of
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index b0df874953b..0ce76a5e23a 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -3819,6 +3819,209 @@ def idxmax(
                 keep_attrs=keep_attrs,
             )
     
    +    def argmin(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
    +        """Index or indices of the minimum of the DataArray over one or more dimensions.
    +
    +        If a sequence is passed to 'dim', then result returned as dict of DataArrays,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a DataArray with dtype int.
    +
    +        If there are multiple minima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the minimum. By default, finds minimum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : DataArray or dict of DataArray
    +
    +        See also
    +        --------
    +        Variable.argmin, DataArray.idxmin
    +
    +        Examples
    +        --------
    +        >>> array = xr.DataArray([0, 2, -1, 3], dims="x")
    +        >>> array.min()
    +        
    +        array(-1)
    +        >>> array.argmin()
    +        
    +        array(2)
    +        >>> array.argmin(...)
    +        {'x': 
    +        array(2)}
    +        >>> array.isel(array.argmin(...))
    +        array(-1)
    +
    +        >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]],
    +        ...                       [[1, 3, 2], [2, -5, 1], [2, 3, 1]]],
    +        ...                      dims=("x", "y", "z"))
    +        >>> array.min(dim="x")
    +        
    +        array([[ 1,  2,  1],
    +               [ 2, -5,  1],
    +               [ 2,  1,  1]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmin(dim="x")
    +        
    +        array([[1, 0, 0],
    +               [1, 1, 1],
    +               [0, 0, 1]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmin(dim=["x"])
    +        {'x': 
    +        array([[1, 0, 0],
    +               [1, 1, 1],
    +               [0, 0, 1]])
    +        Dimensions without coordinates: y, z}
    +        >>> array.min(dim=("x", "z"))
    +        
    +        array([ 1, -5,  1])
    +        Dimensions without coordinates: y
    +        >>> array.argmin(dim=["x", "z"])
    +        {'x': 
    +        array([0, 1, 0])
    +        Dimensions without coordinates: y, 'z': 
    +        array([2, 1, 1])
    +        Dimensions without coordinates: y}
    +        >>> array.isel(array.argmin(dim=["x", "z"]))
    +        
    +        array([ 1, -5,  1])
    +        Dimensions without coordinates: y
    +        """
    +        result = self.variable.argmin(dim, axis, keep_attrs, skipna)
    +        if isinstance(result, dict):
    +            return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()}
    +        else:
    +            return self._replace_maybe_drop_dims(result)
    +
    +    def argmax(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
    +        """Index or indices of the maximum of the DataArray over one or more dimensions.
    +
    +        If a sequence is passed to 'dim', then result returned as dict of DataArrays,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a DataArray with dtype int.
    +
    +        If there are multiple maxima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the maximum. By default, finds maximum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : DataArray or dict of DataArray
    +
    +        See also
    +        --------
    +        Variable.argmax, DataArray.idxmax
    +
    +        Examples
    +        --------
    +        >>> array = xr.DataArray([0, 2, -1, 3], dims="x")
    +        >>> array.max()
    +        
    +        array(3)
    +        >>> array.argmax()
    +        
    +        array(3)
    +        >>> array.argmax(...)
    +        {'x': 
    +        array(3)}
    +        >>> array.isel(array.argmax(...))
    +        
    +        array(3)
    +
    +        >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]],
    +        ...                       [[1, 3, 2], [2, 5, 1], [2, 3, 1]]],
    +        ...                      dims=("x", "y", "z"))
    +        >>> array.max(dim="x")
    +        
    +        array([[3, 3, 2],
    +               [3, 5, 2],
    +               [2, 3, 3]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmax(dim="x")
    +        
    +        array([[0, 1, 1],
    +               [0, 1, 0],
    +               [0, 1, 0]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmax(dim=["x"])
    +        {'x': 
    +        array([[0, 1, 1],
    +               [0, 1, 0],
    +               [0, 1, 0]])
    +        Dimensions without coordinates: y, z}
    +        >>> array.max(dim=("x", "z"))
    +        
    +        array([3, 5, 3])
    +        Dimensions without coordinates: y
    +        >>> array.argmax(dim=["x", "z"])
    +        {'x': 
    +        array([0, 1, 0])
    +        Dimensions without coordinates: y, 'z': 
    +        array([0, 1, 2])
    +        Dimensions without coordinates: y}
    +        >>> array.isel(array.argmax(dim=["x", "z"]))
    +        
    +        array([3, 5, 3])
    +        Dimensions without coordinates: y
    +        """
    +        result = self.variable.argmax(dim, axis, keep_attrs, skipna)
    +        if isinstance(result, dict):
    +            return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()}
    +        else:
    +            return self._replace_maybe_drop_dims(result)
    +
         # this needs to be at the end, or mypy will confuse with `str`
         # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
         str = utils.UncachedAccessor(StringAccessor)
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index a024324bcb1..b46b1d6dce0 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -6368,5 +6368,131 @@ def idxmax(
                 )
             )
     
    +    def argmin(self, dim=None, axis=None, **kwargs):
    +        """Indices of the minima of the member variables.
    +
    +        If there are multiple minima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : str, optional
    +            The dimension over which to find the minimum. By default, finds minimum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will be an error, since DataArray.argmin will
    +            return a dict with indices for all dimensions, which does not make sense for
    +            a Dataset.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Dataset
    +
    +        See also
    +        --------
    +        DataArray.argmin
    +
    +       """
    +        if dim is None and axis is None:
    +            warnings.warn(
    +                "Once the behaviour of DataArray.argmin() and Variable.argmin() with "
    +                "neither dim nor axis argument changes to return a dict of indices of "
    +                "each dimension, for consistency it will be an error to call "
    +                "Dataset.argmin() with no argument, since we don't return a dict of "
    +                "Datasets.",
    +                DeprecationWarning,
    +                stacklevel=2,
    +            )
    +        if (
    +            dim is None
    +            or axis is not None
    +            or (not isinstance(dim, Sequence) and dim is not ...)
    +            or isinstance(dim, str)
    +        ):
    +            # Return int index if single dimension is passed, and is not part of a
    +            # sequence
    +            argmin_func = getattr(duck_array_ops, "argmin")
    +            return self.reduce(argmin_func, dim=dim, axis=axis, **kwargs)
    +        else:
    +            raise ValueError(
    +                "When dim is a sequence or ..., DataArray.argmin() returns a dict. "
    +                "dicts cannot be contained in a Dataset, so cannot call "
    +                "Dataset.argmin() with a sequence or ... for dim"
    +            )
    +
    +    def argmax(self, dim=None, axis=None, **kwargs):
    +        """Indices of the maxima of the member variables.
    +
    +        If there are multiple maxima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : str, optional
    +            The dimension over which to find the maximum. By default, finds maximum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will be an error, since DataArray.argmax will
    +            return a dict with indices for all dimensions, which does not make sense for
    +            a Dataset.
    +        axis : int, optional
    +            Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Dataset
    +
    +        See also
    +        --------
    +        DataArray.argmax
    +
    +       """
    +        if dim is None and axis is None:
    +            warnings.warn(
    +                "Once the behaviour of DataArray.argmax() and Variable.argmax() with "
    +                "neither dim nor axis argument changes to return a dict of indices of "
    +                "each dimension, for consistency it will be an error to call "
    +                "Dataset.argmax() with no argument, since we don't return a dict of "
    +                "Datasets.",
    +                DeprecationWarning,
    +                stacklevel=2,
    +            )
    +        if (
    +            dim is None
    +            or axis is not None
    +            or (not isinstance(dim, Sequence) and dim is not ...)
    +            or isinstance(dim, str)
    +        ):
    +            # Return int index if single dimension is passed, and is not part of a
    +            # sequence
    +            argmax_func = getattr(duck_array_ops, "argmax")
    +            return self.reduce(argmax_func, dim=dim, axis=axis, **kwargs)
    +        else:
    +            raise ValueError(
    +                "When dim is a sequence or ..., DataArray.argmin() returns a dict. "
    +                "dicts cannot be contained in a Dataset, so cannot call "
    +                "Dataset.argmin() with a sequence or ... for dim"
    +            )
    +
     
     ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
    diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
    index 76719699168..df579d23544 100644
    --- a/xarray/core/duck_array_ops.py
    +++ b/xarray/core/duck_array_ops.py
    @@ -359,6 +359,7 @@ def f(values, axis=None, skipna=None, **kwargs):
     cumprod_1d.numeric_only = True
     cumsum_1d = _create_nan_agg_method("cumsum")
     cumsum_1d.numeric_only = True
    +unravel_index = _dask_or_eager_func("unravel_index")
     
     
     _mean = _create_nan_agg_method("mean")
    diff --git a/xarray/core/ops.py b/xarray/core/ops.py
    index b789f93b4f1..d4aeea37aad 100644
    --- a/xarray/core/ops.py
    +++ b/xarray/core/ops.py
    @@ -47,8 +47,6 @@
     # methods which remove an axis
     REDUCE_METHODS = ["all", "any"]
     NAN_REDUCE_METHODS = [
    -    "argmax",
    -    "argmin",
         "max",
         "min",
         "mean",
    diff --git a/xarray/core/variable.py b/xarray/core/variable.py
    index e19132b1b06..c505c749557 100644
    --- a/xarray/core/variable.py
    +++ b/xarray/core/variable.py
    @@ -6,7 +6,17 @@
     from collections import defaultdict
     from datetime import timedelta
     from distutils.version import LooseVersion
    -from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union
    +from typing import (
    +    Any,
    +    Dict,
    +    Hashable,
    +    Mapping,
    +    Optional,
    +    Sequence,
    +    Tuple,
    +    TypeVar,
    +    Union,
    +)
     
     import numpy as np
     import pandas as pd
    @@ -2069,6 +2079,166 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
             )
             return type(self)(self.dims, numeric_array, self._attrs)
     
    +    def _unravel_argminmax(
    +        self,
    +        argminmax: str,
    +        dim: Union[Hashable, Sequence[Hashable], None],
    +        axis: Union[int, None],
    +        keep_attrs: Optional[bool],
    +        skipna: Optional[bool],
    +    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
    +        """Apply argmin or argmax over one or more dimensions, returning the result as a
    +        dict of DataArray that can be passed directly to isel.
    +        """
    +        if dim is None and axis is None:
    +            warnings.warn(
    +                "Behaviour of argmin/argmax with neither dim nor axis argument will "
    +                "change to return a dict of indices of each dimension. To get a "
    +                "single, flat index, please use np.argmin(da.data) or "
    +                "np.argmax(da.data) instead of da.argmin() or da.argmax().",
    +                DeprecationWarning,
    +                stacklevel=3,
    +            )
    +
    +        argminmax_func = getattr(duck_array_ops, argminmax)
    +
    +        if dim is ...:
    +            # In future, should do this also when (dim is None and axis is None)
    +            dim = self.dims
    +        if (
    +            dim is None
    +            or axis is not None
    +            or not isinstance(dim, Sequence)
    +            or isinstance(dim, str)
    +        ):
    +            # Return int index if single dimension is passed, and is not part of a
    +            # sequence
    +            return self.reduce(
    +                argminmax_func, dim=dim, axis=axis, keep_attrs=keep_attrs, skipna=skipna
    +            )
    +
    +        # Get a name for the new dimension that does not conflict with any existing
    +        # dimension
    +        newdimname = "_unravel_argminmax_dim_0"
    +        count = 1
    +        while newdimname in self.dims:
    +            newdimname = "_unravel_argminmax_dim_{}".format(count)
    +            count += 1
    +
    +        stacked = self.stack({newdimname: dim})
    +
    +        result_dims = stacked.dims[:-1]
    +        reduce_shape = tuple(self.sizes[d] for d in dim)
    +
    +        result_flat_indices = stacked.reduce(argminmax_func, axis=-1, skipna=skipna)
    +
    +        result_unravelled_indices = duck_array_ops.unravel_index(
    +            result_flat_indices.data, reduce_shape
    +        )
    +
    +        result = {
    +            d: Variable(dims=result_dims, data=i)
    +            for d, i in zip(dim, result_unravelled_indices)
    +        }
    +
    +        if keep_attrs is None:
    +            keep_attrs = _get_keep_attrs(default=False)
    +        if keep_attrs:
    +            for v in result.values():
    +                v.attrs = self.attrs
    +
    +        return result
    +
    +    def argmin(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
    +        """Index or indices of the minimum of the Variable over one or more dimensions.
    +        If a sequence is passed to 'dim', then result returned as dict of Variables,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a Variable with dtype int.
    +
    +        If there are multiple minima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the minimum. By default, finds minimum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Variable or dict of Variable
    +
    +        See also
    +        --------
    +        DataArray.argmin, DataArray.idxmin
    +        """
    +        return self._unravel_argminmax("argmin", dim, axis, keep_attrs, skipna)
    +
    +    def argmax(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
    +        """Index or indices of the maximum of the Variable over one or more dimensions.
    +        If a sequence is passed to 'dim', then result returned as dict of Variables,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a Variable with dtype int.
    +
    +        If there are multiple maxima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the maximum. By default, finds maximum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Variable or dict of Variable
    +
    +        See also
    +        --------
    +        DataArray.argmax, DataArray.idxmax
    +        """
    +        return self._unravel_argminmax("argmax", dim, axis, keep_attrs, skipna)
    +
     
     ops.inject_all_ops_and_reduce_methods(Variable)
     
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index d942667a4c7..793090cc122 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -4493,6 +4493,9 @@ def test_max(self, x, minindex, maxindex, nanindex):
     
             assert_identical(result2, expected2)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
         def test_argmin(self, x, minindex, maxindex, nanindex):
             ar = xr.DataArray(
                 x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    @@ -4522,6 +4525,9 @@ def test_argmin(self, x, minindex, maxindex, nanindex):
     
             assert_identical(result2, expected2)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
         def test_argmax(self, x, minindex, maxindex, nanindex):
             ar = xr.DataArray(
                 x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    @@ -4763,6 +4769,78 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
             result7 = ar0.idxmax(fill_value=-1j)
             assert_identical(result7, expected7)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmin_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    +        )
    +        indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
    +
    +        if np.isnan(minindex):
    +            with pytest.raises(ValueError):
    +                ar.argmin()
    +            return
    +
    +        expected0 = {"x": indarr[minindex]}
    +        result0 = ar.argmin(...)
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmin(..., keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        for da in expected1.values():
    +            da.attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        result2 = ar.argmin(..., skipna=False)
    +        if nanindex is not None and ar.dtype.kind != "O":
    +            expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
    +            expected2["x"].attrs = {}
    +        else:
    +            expected2 = expected0
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmax_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    +        )
    +        indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
    +
    +        if np.isnan(maxindex):
    +            with pytest.raises(ValueError):
    +                ar.argmax()
    +            return
    +
    +        expected0 = {"x": indarr[maxindex]}
    +        result0 = ar.argmax(...)
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmax(..., keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        for da in expected1.values():
    +            da.attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        result2 = ar.argmax(..., skipna=False)
    +        if nanindex is not None and ar.dtype.kind != "O":
    +            expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
    +            expected2["x"].attrs = {}
    +        else:
    +            expected2 = expected0
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
     
     @pytest.mark.parametrize(
         "x, minindex, maxindex, nanindex",
    @@ -5256,6 +5334,751 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
                 result7 = ar0.idxmax(dim="x", fill_value=-5j)
             assert_identical(result7, expected7)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmin_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x,
    +            dims=["y", "x"],
    +            coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
    +            attrs=self.attrs,
    +        )
    +        indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
    +        indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords)
    +
    +        if np.isnan(minindex).any():
    +            with pytest.raises(ValueError):
    +                ar.argmin(dim="x")
    +            return
    +
    +        expected0 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(minindex)
    +        ]
    +        expected0 = {"x": xr.concat(expected0, dim="y")}
    +
    +        result0 = ar.argmin(dim=["x"])
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmin(dim=["x"], keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        expected1["x"].attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        minindex = [
    +            x if y is None or ar.dtype.kind == "O" else y
    +            for x, y in zip(minindex, nanindex)
    +        ]
    +        expected2 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(minindex)
    +        ]
    +        expected2 = {"x": xr.concat(expected2, dim="y")}
    +        expected2["x"].attrs = {}
    +
    +        result2 = ar.argmin(dim=["x"], skipna=False)
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
    +        result3 = ar.argmin(...)
    +        min_xind = ar.isel(expected0).argmin()
    +        expected3 = {
    +            "y": DataArray(min_xind),
    +            "x": DataArray(minindex[min_xind.item()]),
    +        }
    +
    +        for key in expected3:
    +            assert_identical(result3[key], expected3[key])
    +
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmax_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x,
    +            dims=["y", "x"],
    +            coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
    +            attrs=self.attrs,
    +        )
    +        indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
    +        indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords)
    +
    +        if np.isnan(maxindex).any():
    +            with pytest.raises(ValueError):
    +                ar.argmax(dim="x")
    +            return
    +
    +        expected0 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(maxindex)
    +        ]
    +        expected0 = {"x": xr.concat(expected0, dim="y")}
    +
    +        result0 = ar.argmax(dim=["x"])
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmax(dim=["x"], keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        expected1["x"].attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        maxindex = [
    +            x if y is None or ar.dtype.kind == "O" else y
    +            for x, y in zip(maxindex, nanindex)
    +        ]
    +        expected2 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(maxindex)
    +        ]
    +        expected2 = {"x": xr.concat(expected2, dim="y")}
    +        expected2["x"].attrs = {}
    +
    +        result2 = ar.argmax(dim=["x"], skipna=False)
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
    +        result3 = ar.argmax(...)
    +        max_xind = ar.isel(expected0).argmax()
    +        expected3 = {
    +            "y": DataArray(max_xind),
    +            "x": DataArray(maxindex[max_xind.item()]),
    +        }
    +
    +        for key in expected3:
    +            assert_identical(result3[key], expected3[key])
    +
    +
    +@pytest.mark.parametrize(
    +    "x, minindices_x, minindices_y, minindices_z, minindices_xy, "
    +    "minindices_xz, minindices_yz, minindices_xyz, maxindices_x, "
    +    "maxindices_y, maxindices_z, maxindices_xy, maxindices_xz, maxindices_yz, "
    +    "maxindices_xyz, nanindices_x, nanindices_y, nanindices_z, nanindices_xy, "
    +    "nanindices_xz, nanindices_yz, nanindices_xyz",
    +    [
    +        (
    +            np.array(
    +                [
    +                    [[0, 1, 2, 0], [-2, -4, 2, 0]],
    +                    [[1, 1, 1, 1], [1, 1, 1, 1]],
    +                    [[0, 0, -10, 5], [20, 0, 0, 0]],
    +                ]
    +            ),
    +            {"x": np.array([[0, 2, 2, 0], [0, 0, 2, 0]])},
    +            {"y": np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]])},
    +            {"z": np.array([[0, 1], [0, 0], [2, 1]])},
    +            {"x": np.array([0, 0, 2, 0]), "y": np.array([1, 1, 0, 0])},
    +            {"x": np.array([2, 0]), "z": np.array([2, 1])},
    +            {"y": np.array([1, 0, 0]), "z": np.array([1, 0, 2])},
    +            {"x": np.array(2), "y": np.array(0), "z": np.array(2)},
    +            {"x": np.array([[1, 0, 0, 2], [2, 1, 0, 1]])},
    +            {"y": np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 1, 0]])},
    +            {"z": np.array([[2, 2], [0, 0], [3, 0]])},
    +            {"x": np.array([2, 0, 0, 2]), "y": np.array([1, 0, 0, 0])},
    +            {"x": np.array([2, 2]), "z": np.array([3, 0])},
    +            {"y": np.array([0, 0, 1]), "z": np.array([2, 0, 0])},
    +            {"x": np.array(2), "y": np.array(1), "z": np.array(0)},
    +            {"x": np.array([[None, None, None, None], [None, None, None, None]])},
    +            {
    +                "y": np.array(
    +                    [
    +                        [None, None, None, None],
    +                        [None, None, None, None],
    +                        [None, None, None, None],
    +                    ]
    +                )
    +            },
    +            {"z": np.array([[None, None], [None, None], [None, None]])},
    +            {
    +                "x": np.array([None, None, None, None]),
    +                "y": np.array([None, None, None, None]),
    +            },
    +            {"x": np.array([None, None]), "z": np.array([None, None])},
    +            {"y": np.array([None, None, None]), "z": np.array([None, None, None])},
    +            {"x": np.array(None), "y": np.array(None), "z": np.array(None)},
    +        ),
    +        (
    +            np.array(
    +                [
    +                    [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[np.NaN] * 4, [np.NaN] * 4],
    +                ]
    +            ),
    +            {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])},
    +            {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
    +            {"x": np.array([1, 0]), "z": np.array([0, 1])},
    +            {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])},
    +            {"x": np.array(0), "y": np.array(1), "z": np.array(1)},
    +            {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])},
    +            {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([0, 0]), "z": np.array([2, 2])},
    +            {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])},
    +            {"x": np.array(0), "y": np.array(0), "z": np.array(0)},
    +            {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
    +            {
    +                "y": np.array(
    +                    [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
    +                )
    +            },
    +            {"z": np.array([[None, None], [1, None], [0, 0]])},
    +            {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([1, 2]), "z": np.array([1, 0])},
    +            {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
    +            {"x": np.array(1), "y": np.array(0), "z": np.array(1)},
    +        ),
    +        (
    +            np.array(
    +                [
    +                    [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[np.NaN] * 4, [np.NaN] * 4],
    +                ]
    +            ).astype("object"),
    +            {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])},
    +            {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
    +            {"x": np.array([1, 0]), "z": np.array([0, 1])},
    +            {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])},
    +            {"x": np.array(0), "y": np.array(1), "z": np.array(1)},
    +            {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])},
    +            {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([0, 0]), "z": np.array([2, 2])},
    +            {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])},
    +            {"x": np.array(0), "y": np.array(0), "z": np.array(0)},
    +            {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
    +            {
    +                "y": np.array(
    +                    [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
    +                )
    +            },
    +            {"z": np.array([[None, None], [1, None], [0, 0]])},
    +            {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([1, 2]), "z": np.array([1, 0])},
    +            {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
    +            {"x": np.array(1), "y": np.array(0), "z": np.array(1)},
    +        ),
    +        (
    +            np.array(
    +                [
    +                    [["2015-12-31", "2020-01-02"], ["2020-01-01", "2016-01-01"]],
    +                    [["2020-01-02", "2020-01-02"], ["2020-01-02", "2020-01-02"]],
    +                    [["1900-01-01", "1-02-03"], ["1900-01-02", "1-02-03"]],
    +                ],
    +                dtype="datetime64[ns]",
    +            ),
    +            {"x": np.array([[2, 2], [2, 2]])},
    +            {"y": np.array([[0, 1], [0, 0], [0, 0]])},
    +            {"z": np.array([[0, 1], [0, 0], [1, 1]])},
    +            {"x": np.array([2, 2]), "y": np.array([0, 0])},
    +            {"x": np.array([2, 2]), "z": np.array([1, 1])},
    +            {"y": np.array([0, 0, 0]), "z": np.array([0, 0, 1])},
    +            {"x": np.array(2), "y": np.array(0), "z": np.array(1)},
    +            {"x": np.array([[1, 0], [1, 1]])},
    +            {"y": np.array([[1, 0], [0, 0], [1, 0]])},
    +            {"z": np.array([[1, 0], [0, 0], [0, 0]])},
    +            {"x": np.array([1, 0]), "y": np.array([0, 0])},
    +            {"x": np.array([0, 1]), "z": np.array([1, 0])},
    +            {"y": np.array([0, 0, 1]), "z": np.array([1, 0, 0])},
    +            {"x": np.array(0), "y": np.array(0), "z": np.array(1)},
    +            {"x": np.array([[None, None], [None, None]])},
    +            {"y": np.array([[None, None], [None, None], [None, None]])},
    +            {"z": np.array([[None, None], [None, None], [None, None]])},
    +            {"x": np.array([None, None]), "y": np.array([None, None])},
    +            {"x": np.array([None, None]), "z": np.array([None, None])},
    +            {"y": np.array([None, None, None]), "z": np.array([None, None, None])},
    +            {"x": np.array(None), "y": np.array(None), "z": np.array(None)},
    +        ),
    +    ],
    +)
    +class TestReduce3D(TestReduce):
    +    def test_argmin_dim(
    +        self,
    +        x,
    +        minindices_x,
    +        minindices_y,
    +        minindices_z,
    +        minindices_xy,
    +        minindices_xz,
    +        minindices_yz,
    +        minindices_xyz,
    +        maxindices_x,
    +        maxindices_y,
    +        maxindices_z,
    +        maxindices_xy,
    +        maxindices_xz,
    +        maxindices_yz,
    +        maxindices_xyz,
    +        nanindices_x,
    +        nanindices_y,
    +        nanindices_z,
    +        nanindices_xy,
    +        nanindices_xz,
    +        nanindices_yz,
    +        nanindices_xyz,
    +    ):
    +
    +        ar = xr.DataArray(
    +            x,
    +            dims=["x", "y", "z"],
    +            coords={
    +                "x": np.arange(x.shape[0]) * 4,
    +                "y": 1 - np.arange(x.shape[1]),
    +                "z": 2 + 3 * np.arange(x.shape[2]),
    +            },
    +            attrs=self.attrs,
    +        )
    +        xindarr = np.tile(
    +            np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis],
    +            [1, x.shape[1], x.shape[2]],
    +        )
    +        xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords)
    +        yindarr = np.tile(
    +            np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis],
    +            [x.shape[0], 1, x.shape[2]],
    +        )
    +        yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords)
    +        zindarr = np.tile(
    +            np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :],
    +            [x.shape[0], x.shape[1], 1],
    +        )
    +        zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords)
    +
    +        for inds in [
    +            minindices_x,
    +            minindices_y,
    +            minindices_z,
    +            minindices_xy,
    +            minindices_xz,
    +            minindices_yz,
    +            minindices_xyz,
    +        ]:
    +            if np.array([np.isnan(i) for i in inds.values()]).any():
    +                with pytest.raises(ValueError):
    +                    ar.argmin(dim=[d for d in inds])
    +                return
    +
    +        result0 = ar.argmin(dim=["x"])
    +        expected0 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in minindices_x.items()
    +        }
    +        for key in expected0:
    +            assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
    +
    +        result1 = ar.argmin(dim=["y"])
    +        expected1 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in minindices_y.items()
    +        }
    +        for key in expected1:
    +            assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
    +
    +        result2 = ar.argmin(dim=["z"])
    +        expected2 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in minindices_z.items()
    +        }
    +        for key in expected2:
    +            assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
    +
    +        result3 = ar.argmin(dim=("x", "y"))
    +        expected3 = {
    +            key: xr.DataArray(value, dims=("z")) for key, value in minindices_xy.items()
    +        }
    +        for key in expected3:
    +            assert_identical(result3[key].drop_vars("z"), expected3[key])
    +
    +        result4 = ar.argmin(dim=("x", "z"))
    +        expected4 = {
    +            key: xr.DataArray(value, dims=("y")) for key, value in minindices_xz.items()
    +        }
    +        for key in expected4:
    +            assert_identical(result4[key].drop_vars("y"), expected4[key])
    +
    +        result5 = ar.argmin(dim=("y", "z"))
    +        expected5 = {
    +            key: xr.DataArray(value, dims=("x")) for key, value in minindices_yz.items()
    +        }
    +        for key in expected5:
    +            assert_identical(result5[key].drop_vars("x"), expected5[key])
    +
    +        result6 = ar.argmin(...)
    +        expected6 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
    +        for key in expected6:
    +            assert_identical(result6[key], expected6[key])
    +
    +        minindices_x = {
    +            key: xr.where(
    +                nanindices_x[key] == None,  # noqa: E711
    +                minindices_x[key],
    +                nanindices_x[key],
    +            )
    +            for key in minindices_x
    +        }
    +        expected7 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in minindices_x.items()
    +        }
    +
    +        result7 = ar.argmin(dim=["x"], skipna=False)
    +        for key in expected7:
    +            assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
    +
    +        minindices_y = {
    +            key: xr.where(
    +                nanindices_y[key] == None,  # noqa: E711
    +                minindices_y[key],
    +                nanindices_y[key],
    +            )
    +            for key in minindices_y
    +        }
    +        expected8 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in minindices_y.items()
    +        }
    +
    +        result8 = ar.argmin(dim=["y"], skipna=False)
    +        for key in expected8:
    +            assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
    +
    +        minindices_z = {
    +            key: xr.where(
    +                nanindices_z[key] == None,  # noqa: E711
    +                minindices_z[key],
    +                nanindices_z[key],
    +            )
    +            for key in minindices_z
    +        }
    +        expected9 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in minindices_z.items()
    +        }
    +
    +        result9 = ar.argmin(dim=["z"], skipna=False)
    +        for key in expected9:
    +            assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
    +
    +        minindices_xy = {
    +            key: xr.where(
    +                nanindices_xy[key] == None,  # noqa: E711
    +                minindices_xy[key],
    +                nanindices_xy[key],
    +            )
    +            for key in minindices_xy
    +        }
    +        expected10 = {
    +            key: xr.DataArray(value, dims="z") for key, value in minindices_xy.items()
    +        }
    +
    +        result10 = ar.argmin(dim=("x", "y"), skipna=False)
    +        for key in expected10:
    +            assert_identical(result10[key].drop_vars("z"), expected10[key])
    +
    +        minindices_xz = {
    +            key: xr.where(
    +                nanindices_xz[key] == None,  # noqa: E711
    +                minindices_xz[key],
    +                nanindices_xz[key],
    +            )
    +            for key in minindices_xz
    +        }
    +        expected11 = {
    +            key: xr.DataArray(value, dims="y") for key, value in minindices_xz.items()
    +        }
    +
    +        result11 = ar.argmin(dim=("x", "z"), skipna=False)
    +        for key in expected11:
    +            assert_identical(result11[key].drop_vars("y"), expected11[key])
    +
    +        minindices_yz = {
    +            key: xr.where(
    +                nanindices_yz[key] == None,  # noqa: E711
    +                minindices_yz[key],
    +                nanindices_yz[key],
    +            )
    +            for key in minindices_yz
    +        }
    +        expected12 = {
    +            key: xr.DataArray(value, dims="x") for key, value in minindices_yz.items()
    +        }
    +
    +        result12 = ar.argmin(dim=("y", "z"), skipna=False)
    +        for key in expected12:
    +            assert_identical(result12[key].drop_vars("x"), expected12[key])
    +
    +        minindices_xyz = {
    +            key: xr.where(
    +                nanindices_xyz[key] == None,  # noqa: E711
    +                minindices_xyz[key],
    +                nanindices_xyz[key],
    +            )
    +            for key in minindices_xyz
    +        }
    +        expected13 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
    +
    +        result13 = ar.argmin(..., skipna=False)
    +        for key in expected13:
    +            assert_identical(result13[key], expected13[key])
    +
    +    def test_argmax_dim(
    +        self,
    +        x,
    +        minindices_x,
    +        minindices_y,
    +        minindices_z,
    +        minindices_xy,
    +        minindices_xz,
    +        minindices_yz,
    +        minindices_xyz,
    +        maxindices_x,
    +        maxindices_y,
    +        maxindices_z,
    +        maxindices_xy,
    +        maxindices_xz,
    +        maxindices_yz,
    +        maxindices_xyz,
    +        nanindices_x,
    +        nanindices_y,
    +        nanindices_z,
    +        nanindices_xy,
    +        nanindices_xz,
    +        nanindices_yz,
    +        nanindices_xyz,
    +    ):
    +
    +        ar = xr.DataArray(
    +            x,
    +            dims=["x", "y", "z"],
    +            coords={
    +                "x": np.arange(x.shape[0]) * 4,
    +                "y": 1 - np.arange(x.shape[1]),
    +                "z": 2 + 3 * np.arange(x.shape[2]),
    +            },
    +            attrs=self.attrs,
    +        )
    +        xindarr = np.tile(
    +            np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis],
    +            [1, x.shape[1], x.shape[2]],
    +        )
    +        xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords)
    +        yindarr = np.tile(
    +            np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis],
    +            [x.shape[0], 1, x.shape[2]],
    +        )
    +        yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords)
    +        zindarr = np.tile(
    +            np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :],
    +            [x.shape[0], x.shape[1], 1],
    +        )
    +        zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords)
    +
    +        for inds in [
    +            maxindices_x,
    +            maxindices_y,
    +            maxindices_z,
    +            maxindices_xy,
    +            maxindices_xz,
    +            maxindices_yz,
    +            maxindices_xyz,
    +        ]:
    +            if np.array([np.isnan(i) for i in inds.values()]).any():
    +                with pytest.raises(ValueError):
    +                    ar.argmax(dim=[d for d in inds])
    +                return
    +
    +        result0 = ar.argmax(dim=["x"])
    +        expected0 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in maxindices_x.items()
    +        }
    +        for key in expected0:
    +            assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
    +
    +        result1 = ar.argmax(dim=["y"])
    +        expected1 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in maxindices_y.items()
    +        }
    +        for key in expected1:
    +            assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
    +
    +        result2 = ar.argmax(dim=["z"])
    +        expected2 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in maxindices_z.items()
    +        }
    +        for key in expected2:
    +            assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
    +
    +        result3 = ar.argmax(dim=("x", "y"))
    +        expected3 = {
    +            key: xr.DataArray(value, dims=("z")) for key, value in maxindices_xy.items()
    +        }
    +        for key in expected3:
    +            assert_identical(result3[key].drop_vars("z"), expected3[key])
    +
    +        result4 = ar.argmax(dim=("x", "z"))
    +        expected4 = {
    +            key: xr.DataArray(value, dims=("y")) for key, value in maxindices_xz.items()
    +        }
    +        for key in expected4:
    +            assert_identical(result4[key].drop_vars("y"), expected4[key])
    +
    +        result5 = ar.argmax(dim=("y", "z"))
    +        expected5 = {
    +            key: xr.DataArray(value, dims=("x")) for key, value in maxindices_yz.items()
    +        }
    +        for key in expected5:
    +            assert_identical(result5[key].drop_vars("x"), expected5[key])
    +
    +        result6 = ar.argmax(...)
    +        expected6 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
    +        for key in expected6:
    +            assert_identical(result6[key], expected6[key])
    +
    +        maxindices_x = {
    +            key: xr.where(
    +                nanindices_x[key] == None,  # noqa: E711
    +                maxindices_x[key],
    +                nanindices_x[key],
    +            )
    +            for key in maxindices_x
    +        }
    +        expected7 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in maxindices_x.items()
    +        }
    +
    +        result7 = ar.argmax(dim=["x"], skipna=False)
    +        for key in expected7:
    +            assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
    +
    +        maxindices_y = {
    +            key: xr.where(
    +                nanindices_y[key] == None,  # noqa: E711
    +                maxindices_y[key],
    +                nanindices_y[key],
    +            )
    +            for key in maxindices_y
    +        }
    +        expected8 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in maxindices_y.items()
    +        }
    +
    +        result8 = ar.argmax(dim=["y"], skipna=False)
    +        for key in expected8:
    +            assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
    +
    +        maxindices_z = {
    +            key: xr.where(
    +                nanindices_z[key] == None,  # noqa: E711
    +                maxindices_z[key],
    +                nanindices_z[key],
    +            )
    +            for key in maxindices_z
    +        }
    +        expected9 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in maxindices_z.items()
    +        }
    +
    +        result9 = ar.argmax(dim=["z"], skipna=False)
    +        for key in expected9:
    +            assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
    +
    +        maxindices_xy = {
    +            key: xr.where(
    +                nanindices_xy[key] == None,  # noqa: E711
    +                maxindices_xy[key],
    +                nanindices_xy[key],
    +            )
    +            for key in maxindices_xy
    +        }
    +        expected10 = {
    +            key: xr.DataArray(value, dims="z") for key, value in maxindices_xy.items()
    +        }
    +
    +        result10 = ar.argmax(dim=("x", "y"), skipna=False)
    +        for key in expected10:
    +            assert_identical(result10[key].drop_vars("z"), expected10[key])
    +
    +        maxindices_xz = {
    +            key: xr.where(
    +                nanindices_xz[key] == None,  # noqa: E711
    +                maxindices_xz[key],
    +                nanindices_xz[key],
    +            )
    +            for key in maxindices_xz
    +        }
    +        expected11 = {
    +            key: xr.DataArray(value, dims="y") for key, value in maxindices_xz.items()
    +        }
    +
    +        result11 = ar.argmax(dim=("x", "z"), skipna=False)
    +        for key in expected11:
    +            assert_identical(result11[key].drop_vars("y"), expected11[key])
    +
    +        maxindices_yz = {
    +            key: xr.where(
    +                nanindices_yz[key] == None,  # noqa: E711
    +                maxindices_yz[key],
    +                nanindices_yz[key],
    +            )
    +            for key in maxindices_yz
    +        }
    +        expected12 = {
    +            key: xr.DataArray(value, dims="x") for key, value in maxindices_yz.items()
    +        }
    +
    +        result12 = ar.argmax(dim=("y", "z"), skipna=False)
    +        for key in expected12:
    +            assert_identical(result12[key].drop_vars("x"), expected12[key])
    +
    +        maxindices_xyz = {
    +            key: xr.where(
    +                nanindices_xyz[key] == None,  # noqa: E711
    +                maxindices_xyz[key],
    +                nanindices_xyz[key],
    +            )
    +            for key in maxindices_xyz
    +        }
    +        expected13 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
    +
    +        result13 = ar.argmax(..., skipna=False)
    +        for key in expected13:
    +            assert_identical(result13[key], expected13[key])
    +
     
     class TestReduceND(TestReduce):
         @pytest.mark.parametrize("op", ["idxmin", "idxmax"])
    diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
    index 9c8d40724da..0c4082a553e 100644
    --- a/xarray/tests/test_dataset.py
    +++ b/xarray/tests/test_dataset.py
    @@ -4597,6 +4597,9 @@ def test_reduce_non_numeric(self):
             assert_equal(data1.mean(), data2.mean())
             assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1"))
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Once the behaviour of DataArray:DeprecationWarning"
    +    )
         def test_reduce_strings(self):
             expected = Dataset({"x": "a"})
             ds = Dataset({"x": ("y", ["a", "b"])})
    @@ -4668,6 +4671,9 @@ def test_reduce_keep_attrs(self):
             for k, v in ds.data_vars.items():
                 assert v.attrs == data[k].attrs
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Once the behaviour of DataArray:DeprecationWarning"
    +    )
         def test_reduce_argmin(self):
             # regression test for #205
             ds = Dataset({"a": ("x", [0, 1])})
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index fb9063ca49e..20a5f0e8613 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -297,19 +297,29 @@ def __call__(self, obj, *args, **kwargs):
             all_args = merge_args(self.args, args)
             all_kwargs = {**self.kwargs, **kwargs}
     
    +        xarray_classes = (
    +            xr.Variable,
    +            xr.DataArray,
    +            xr.Dataset,
    +            xr.core.groupby.GroupBy,
    +        )
    +
    +        if not isinstance(obj, xarray_classes):
    +            # remove typical xarray args like "dim"
    +            exclude_kwargs = ("dim", "dims")
    +            all_kwargs = {
    +                key: value
    +                for key, value in all_kwargs.items()
    +                if key not in exclude_kwargs
    +            }
    +
             func = getattr(obj, self.name, None)
    +
             if func is None or not isinstance(func, Callable):
                 # fall back to module level numpy functions if not a xarray object
                 if not isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
                     numpy_func = getattr(np, self.name)
                     func = partial(numpy_func, obj)
    -                # remove typical xarray args like "dim"
    -                exclude_kwargs = ("dim", "dims")
    -                all_kwargs = {
    -                    key: value
    -                    for key, value in all_kwargs.items()
    -                    if key not in exclude_kwargs
    -                }
                 else:
                     raise AttributeError(f"{obj} has no method named '{self.name}'")
     
    @@ -1408,8 +1418,8 @@ def test_real_and_imag(self):
             (
                 method("all"),
                 method("any"),
    -            method("argmax"),
    -            method("argmin"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("argsort"),
                 method("cumprod"),
                 method("cumsum"),
    @@ -1433,7 +1443,11 @@ def test_aggregation(self, func, dtype):
             )
             variable = xr.Variable("x", array)
     
    -        units = extract_units(func(array))
    +        numpy_kwargs = func.kwargs.copy()
    +        if "dim" in func.kwargs:
    +            numpy_kwargs["axis"] = variable.get_axis_num(numpy_kwargs.pop("dim"))
    +
    +        units = extract_units(func(array, **numpy_kwargs))
             expected = attach_units(func(strip_units(variable)), units)
             actual = func(variable)
     
    @@ -2243,8 +2257,20 @@ def test_repr(self, func, variant, dtype):
             (
                 function("all"),
                 function("any"),
    -            function("argmax"),
    -            function("argmin"),
    +            pytest.param(
    +                function("argmax"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmax as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
    +            pytest.param(
    +                function("argmin"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmin as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
                 function("max"),
                 function("mean"),
                 pytest.param(
    @@ -2265,8 +2291,8 @@ def test_repr(self, func, variant, dtype):
                 function("cumprod"),
                 method("all"),
                 method("any"),
    -            method("argmax"),
    -            method("argmin"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("max"),
                 method("mean"),
                 method("median"),
    @@ -2289,6 +2315,10 @@ def test_aggregation(self, func, dtype):
             )
             data_array = xr.DataArray(data=array, dims="x")
     
    +        numpy_kwargs = func.kwargs.copy()
    +        if "dim" in numpy_kwargs:
    +            numpy_kwargs["axis"] = data_array.get_axis_num(numpy_kwargs.pop("dim"))
    +
             # units differ based on the applied function, so we need to
             # first compute the units
             units = extract_units(func(array))
    @@ -3803,8 +3833,20 @@ def test_repr(self, func, variant, dtype):
             (
                 function("all"),
                 function("any"),
    -            function("argmax"),
    -            function("argmin"),
    +            pytest.param(
    +                function("argmax"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmax as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
    +            pytest.param(
    +                function("argmin"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmin as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
                 function("max"),
                 function("min"),
                 function("mean"),
    @@ -3823,8 +3865,8 @@ def test_repr(self, func, variant, dtype):
                 function("cumprod"),
                 method("all"),
                 method("any"),
    -            method("argmax"),
    -            method("argmin"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("max"),
                 method("min"),
                 method("mean"),
    @@ -3853,8 +3895,23 @@ def test_aggregation(self, func, dtype):
     
             ds = xr.Dataset({"a": ("x", a), "b": ("x", b)})
     
    -        units_a = array_extract_units(func(a))
    -        units_b = array_extract_units(func(b))
    +        if "dim" in func.kwargs:
    +            numpy_kwargs = func.kwargs.copy()
    +            dim = numpy_kwargs.pop("dim")
    +
    +            axis_a = ds.a.get_axis_num(dim)
    +            axis_b = ds.b.get_axis_num(dim)
    +
    +            numpy_kwargs_a = numpy_kwargs.copy()
    +            numpy_kwargs_a["axis"] = axis_a
    +            numpy_kwargs_b = numpy_kwargs.copy()
    +            numpy_kwargs_b["axis"] = axis_b
    +        else:
    +            numpy_kwargs_a = {}
    +            numpy_kwargs_b = {}
    +
    +        units_a = array_extract_units(func(a, **numpy_kwargs_a))
    +        units_b = array_extract_units(func(b, **numpy_kwargs_b))
             units = {"a": units_a, "b": units_b}
     
             actual = func(ds)
    diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
    index 3003e0d66f3..d79d40d67c0 100644
    --- a/xarray/tests/test_variable.py
    +++ b/xarray/tests/test_variable.py
    @@ -1657,7 +1657,7 @@ def test_reduce_funcs(self):
             assert_identical(v.all(dim="x"), Variable([], False))
     
             v = Variable("t", pd.date_range("2000-01-01", periods=3))
    -        assert v.argmax(skipna=True) == 2
    +        assert v.argmax(skipna=True, dim="t") == 2
     
             assert_identical(v.max(), Variable([], pd.Timestamp("2000-01-03")))
     
    
    From 54b9450b9b9b1805831b2a891dbf7aa321583096 Mon Sep 17 00:00:00 2001
    From: Yohai Bar Sinai <6164157+yohai@users.noreply.github.com>
    Date: Tue, 30 Jun 2020 14:35:20 +0300
    Subject: [PATCH 071/342] fix typo in error message in plot.py (#4188)
    
    ---
     xarray/plot/plot.py | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
    index e4a981daf8c..9081f1adb30 100644
    --- a/xarray/plot/plot.py
    +++ b/xarray/plot/plot.py
    @@ -62,7 +62,7 @@ def _infer_line_data(darray, x, y, hue):
     
         else:
             if x is None and y is None and hue is None:
    -            raise ValueError("For 2D inputs, please" "specify either hue, x or y.")
    +            raise ValueError("For 2D inputs, please specify either hue, x or y.")
     
             if y is None:
                 xname, huename = _infer_xy_labels(darray=darray, x=x, y=hue)
    
    From 5377687473ecb78db085b47f4f5774eb1df93970 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Wed, 1 Jul 2020 20:04:59 +0200
    Subject: [PATCH 072/342] use the latest image of RTD (#4191)
    
    ---
     readthedocs.yml | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/readthedocs.yml b/readthedocs.yml
    index 173d61ec6f3..88aee82a44b 100644
    --- a/readthedocs.yml
    +++ b/readthedocs.yml
    @@ -1,7 +1,7 @@
     version: 2
     
     build:
    -    image: stable
    +    image: latest
     
     conda:
         environment: ci/requirements/doc.yml
    
    From 06c213ead8471520f15ac8378ffe33b36ba4e818 Mon Sep 17 00:00:00 2001
    From: David Brochart 
    Date: Thu, 2 Jul 2020 14:09:38 +0200
    Subject: [PATCH 073/342] Fix typo (#4192)
    
    ---
     doc/related-projects.rst | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/doc/related-projects.rst b/doc/related-projects.rst
    index 9891f1a6bc2..8e8e3f63098 100644
    --- a/doc/related-projects.rst
    +++ b/doc/related-projects.rst
    @@ -76,7 +76,7 @@ Visualization
     - `Datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data.
     - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews.
     - `psyplot `_: Interactive data visualization with python.
    -- `xarray-leaflet `_: An xarray extension for tiles map plotting based on ipyleaflet.
    +- `xarray-leaflet `_: An xarray extension for tiled map plotting based on ipyleaflet.
     
     Non-Python projects
     ~~~~~~~~~~~~~~~~~~~
    
    From e216720ae8168d3e9fb5470cf45f0d382ebc6e20 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Thu, 2 Jul 2020 18:13:59 +0200
    Subject: [PATCH 074/342] More pint compatibility: silence UnitStrippedWarnings
     (#4163)
    
    * globally promote UnitStrippedWarning to errors
    
    * separately test apply_ufunc with units in dims, coords and data
    
    * split the DataArray align test into data, dims and coords tests
    
    * use dtypes instead of python types and use a dtype specific fill value
    
    * rewrite the dataset align tests
    
    * compare with dtypes.NA instead of using np.isnan
    
    * mention the issue in the xfail reason
    
    * make sure the combine_* variants are properly separated from each other
    
    * improve the test case names
    
    * note that broadcast uses align
    
    * properly separate the test cases for concat
    
    * always use the same reason when xfailing units in indexes tests
    
    * also check that the replication functions work with dims and units
    
    * apply full_like to the data instead of the variable
    
    * check full_like with units in dims, data and coords separately
    
    * clearly separate the test variants of the merge tests
    
    * don't use indexes for the dataset where tests
    
    * replace numpy.testing.assert_allclose with assert numpy.allclose
    
    * remove a conditional xfail that depends on a very old pint version
    
    * use assert_identical from the local namespace
    
    * properly separate between the broadcast_like test variants
    
    * don't accept "data" as an alias of the DataArray's data
    
    * properly separate between the variants of the content manipulation tests
    
    * use assert np.allclose(...) instead of np.testing.assert_allclose(...)
    
    * don't test units in indexes in the isel tests
    
    * don't use units in indexes for the head / tail / thin tests
    
    * properly separate the variants of more tests
    
    * rewrite the squeeze tests
    
    * use assert_allclose from the module's namespace
    
    * rewrite the copy tests
    
    * xfail the equal comparison for a pint version lower than 0.14
    
    * try to implement a duckarray friendly assert_array_equal
    
    * add tests for not raising an assertion error
    
    * skip only the dask test if it isn't installed
    
    * also check using pint if available
    
    * add a duckarray version of np.testing.assert_allclose
    
    * add both to __all__
    
    * make both available in xarray.tests
    
    * don't inherit from VariableSubtests since that was not written to test duck arrays.
    
    * test the constant pad mode along with all other modes
    
    * remove most pint version checks, now that pint 0.13 has been released
    
    * use conda to install pint
    
    * xfail the DataArray comparison test until pint's dev version fixed it
    
    * add tests for the pad method of DataArray and Dataset
    
    * add tests for weighted
    
    * update whats-new.rst
    
    * replace assert np.allclose(...) with assert_duckarray_allclose(...)
    
    * fix the dask fallback
    
    * xfail the pint tests for now since there's a bug in pint
    
    * use utils.is_array_like and utils.is_scalar
    ---
     ci/requirements/py36-min-nep18.yml    |    3 +-
     ci/requirements/py36.yml              |    2 +-
     ci/requirements/py37-windows.yml      |    2 +-
     ci/requirements/py37.yml              |    2 +-
     ci/requirements/py38-all-but-dask.yml |    2 +-
     ci/requirements/py38.yml              |    2 +-
     doc/whats-new.rst                     |    2 +-
     xarray/core/common.py                 |    2 +-
     xarray/core/utils.py                  |    6 +
     xarray/testing.py                     |   65 +-
     xarray/tests/__init__.py              |    4 +
     xarray/tests/test_testing.py          |   99 ++
     xarray/tests/test_units.py            | 1354 ++++++++++++++-----------
     13 files changed, 919 insertions(+), 626 deletions(-)
    
    diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
    index cd2b1a18c77..dd543ce4ddf 100644
    --- a/ci/requirements/py36-min-nep18.yml
    +++ b/ci/requirements/py36-min-nep18.yml
    @@ -11,6 +11,7 @@ dependencies:
       - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
       - numpy=1.17
       - pandas=0.25
    +  - pint=0.13
       - pip
       - pytest
       - pytest-cov
    @@ -18,5 +19,3 @@ dependencies:
       - scipy=1.2
       - setuptools=41.2
       - sparse=0.8
    -  - pip:
    -      - pint==0.13
    diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
    index aa2baf9dcce..a500173f277 100644
    --- a/ci/requirements/py36.yml
    +++ b/ci/requirements/py36.yml
    @@ -28,6 +28,7 @@ dependencies:
       - numba
       - numpy
       - pandas
    +  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -44,4 +45,3 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    -    - pint
    diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
    index 8b12704d644..e9e5c7a900a 100644
    --- a/ci/requirements/py37-windows.yml
    +++ b/ci/requirements/py37-windows.yml
    @@ -28,6 +28,7 @@ dependencies:
       - numba
       - numpy
       - pandas
    +  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -44,4 +45,3 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    -    - pint
    diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
    index 70c453e8776..dba3926596e 100644
    --- a/ci/requirements/py37.yml
    +++ b/ci/requirements/py37.yml
    @@ -28,6 +28,7 @@ dependencies:
       - numba
       - numpy
       - pandas
    +  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -44,4 +45,3 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    -    - pint
    diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
    index 6d76eecbd6a..a375d9e1e5a 100644
    --- a/ci/requirements/py38-all-but-dask.yml
    +++ b/ci/requirements/py38-all-but-dask.yml
    @@ -25,6 +25,7 @@ dependencies:
       - numba
       - numpy
       - pandas
    +  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -41,4 +42,3 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    -    - pint
    diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
    index 6f35138978c..7dff3a1bd97 100644
    --- a/ci/requirements/py38.yml
    +++ b/ci/requirements/py38.yml
    @@ -28,6 +28,7 @@ dependencies:
       - numba
       - numpy
       - pandas
    +  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -44,4 +45,3 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    -    - pint
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 086cddee0a0..e4223f2b4e0 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -91,7 +91,7 @@ New Features
     - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
       :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:pull:`3922`, :pull:`4135`)
       By `Kai Mühlbauer `_ and `Pascal Bourgault `_.
    -- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`)
    +- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`, :pull:`4163`)
       By `Justus Magin `_.
     - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even
       without ``append_dim``, as long as dimension sizes do not change.
    diff --git a/xarray/core/common.py b/xarray/core/common.py
    index f759f4c32dd..67dc0fda461 100644
    --- a/xarray/core/common.py
    +++ b/xarray/core/common.py
    @@ -1434,7 +1434,7 @@ def _full_like_variable(other, fill_value, dtype: DTypeLike = None):
                 other.shape, fill_value, dtype=dtype, chunks=other.data.chunks
             )
         else:
    -        data = np.full_like(other, fill_value, dtype=dtype)
    +        data = np.full_like(other.data, fill_value, dtype=dtype)
     
         return Variable(dims=other.dims, data=data, attrs=other.attrs)
     
    diff --git a/xarray/core/utils.py b/xarray/core/utils.py
    index 0542f850b02..668405ba574 100644
    --- a/xarray/core/utils.py
    +++ b/xarray/core/utils.py
    @@ -247,6 +247,12 @@ def is_list_like(value: Any) -> bool:
         return isinstance(value, list) or isinstance(value, tuple)
     
     
    +def is_array_like(value: Any) -> bool:
    +    return (
    +        hasattr(value, "ndim") and hasattr(value, "shape") and hasattr(value, "dtype")
    +    )
    +
    +
     def either_dict_or_kwargs(
         pos_kwargs: Optional[Mapping[Hashable, T]],
         kw_kwargs: Mapping[str, T],
    diff --git a/xarray/testing.py b/xarray/testing.py
    index 9681503414e..ec479ef09d4 100644
    --- a/xarray/testing.py
    +++ b/xarray/testing.py
    @@ -11,7 +11,14 @@
     from xarray.core.indexes import default_indexes
     from xarray.core.variable import IndexVariable, Variable
     
    -__all__ = ("assert_allclose", "assert_chunks_equal", "assert_equal", "assert_identical")
    +__all__ = (
    +    "assert_allclose",
    +    "assert_chunks_equal",
    +    "assert_duckarray_equal",
    +    "assert_duckarray_allclose",
    +    "assert_equal",
    +    "assert_identical",
    +)
     
     
     def _decode_string_data(data):
    @@ -148,6 +155,62 @@ def compat_variable(a, b):
             raise TypeError("{} not supported by assertion comparison".format(type(a)))
     
     
    +def _format_message(x, y, err_msg, verbose):
    +    diff = x - y
    +    abs_diff = max(abs(diff))
    +    rel_diff = "not implemented"
    +
    +    n_diff = int(np.count_nonzero(diff))
    +    n_total = diff.size
    +
    +    fraction = f"{n_diff} / {n_total}"
    +    percentage = float(n_diff / n_total * 100)
    +
    +    parts = [
    +        "Arrays are not equal",
    +        err_msg,
    +        f"Mismatched elements: {fraction} ({percentage:.0f}%)",
    +        f"Max absolute difference: {abs_diff}",
    +        f"Max relative difference: {rel_diff}",
    +    ]
    +    if verbose:
    +        parts += [
    +            f" x: {x!r}",
    +            f" y: {y!r}",
    +        ]
    +
    +    return "\n".join(parts)
    +
    +
    +def assert_duckarray_allclose(
    +    actual, desired, rtol=1e-07, atol=0, err_msg="", verbose=True
    +):
    +    """ Like `np.testing.assert_allclose`, but for duckarrays. """
    +    __tracebackhide__ = True
    +
    +    allclose = duck_array_ops.allclose_or_equiv(actual, desired, rtol=rtol, atol=atol)
    +    assert allclose, _format_message(actual, desired, err_msg=err_msg, verbose=verbose)
    +
    +
    +def assert_duckarray_equal(x, y, err_msg="", verbose=True):
    +    """ Like `np.testing.assert_array_equal`, but for duckarrays """
    +    __tracebackhide__ = True
    +
    +    if not utils.is_array_like(x) and not utils.is_scalar(x):
    +        x = np.asarray(x)
    +
    +    if not utils.is_array_like(y) and not utils.is_scalar(y):
    +        y = np.asarray(y)
    +
    +    if (utils.is_array_like(x) and utils.is_scalar(y)) or (
    +        utils.is_scalar(x) and utils.is_array_like(y)
    +    ):
    +        equiv = (x == y).all()
    +    else:
    +        equiv = duck_array_ops.array_equiv(x, y)
    +    assert equiv, _format_message(x, y, err_msg=err_msg, verbose=verbose)
    +
    +
     def assert_chunks_equal(a, b):
         """
         Assert that chunksizes along chunked dimensions are equal.
    diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
    index 40c5cfa267c..9021c4e7dbc 100644
    --- a/xarray/tests/__init__.py
    +++ b/xarray/tests/__init__.py
    @@ -16,6 +16,10 @@
     from xarray.core.duck_array_ops import allclose_or_equiv  # noqa: F401
     from xarray.core.indexing import ExplicitlyIndexed
     from xarray.core.options import set_options
    +from xarray.testing import (  # noqa: F401
    +    assert_duckarray_allclose,
    +    assert_duckarray_equal,
    +)
     
     # import mpl and change the backend before other mpl imports
     try:
    diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py
    index f4961af58e9..39ad250246b 100644
    --- a/xarray/tests/test_testing.py
    +++ b/xarray/tests/test_testing.py
    @@ -1,7 +1,31 @@
    +import numpy as np
     import pytest
     
     import xarray as xr
     
    +from . import has_dask
    +
    +try:
    +    from dask.array import from_array as dask_from_array
    +except ImportError:
    +    dask_from_array = lambda x: x
    +
    +try:
    +    import pint
    +
    +    unit_registry = pint.UnitRegistry(force_ndarray_like=True)
    +
    +    def quantity(x):
    +        return unit_registry.Quantity(x, "m")
    +
    +    has_pint = True
    +except ImportError:
    +
    +    def quantity(x):
    +        return x
    +
    +    has_pint = False
    +
     
     def test_allclose_regression():
         x = xr.DataArray(1.01)
    @@ -30,3 +54,78 @@ def test_allclose_regression():
     def test_assert_allclose(obj1, obj2):
         with pytest.raises(AssertionError):
             xr.testing.assert_allclose(obj1, obj2)
    +
    +
    +@pytest.mark.filterwarnings("error")
    +@pytest.mark.parametrize(
    +    "duckarray",
    +    (
    +        pytest.param(np.array, id="numpy"),
    +        pytest.param(
    +            dask_from_array,
    +            id="dask",
    +            marks=pytest.mark.skipif(not has_dask, reason="requires dask"),
    +        ),
    +        pytest.param(
    +            quantity,
    +            id="pint",
    +            marks=[
    +                pytest.mark.skipif(not has_pint, reason="requires pint"),
    +                pytest.mark.xfail(
    +                    reason="inconsistencies in the return value of pint's implementation of eq"
    +                ),
    +            ],
    +        ),
    +    ),
    +)
    +@pytest.mark.parametrize(
    +    ["obj1", "obj2"],
    +    (
    +        pytest.param([1e-10, 2], [0.0, 2.0], id="both arrays"),
    +        pytest.param([1e-17, 2], 0.0, id="second scalar"),
    +        pytest.param(0.0, [1e-17, 2], id="first scalar"),
    +    ),
    +)
    +def test_assert_duckarray_equal_failing(duckarray, obj1, obj2):
    +    # TODO: actually check the repr
    +    a = duckarray(obj1)
    +    b = duckarray(obj2)
    +    with pytest.raises(AssertionError):
    +        xr.testing.assert_duckarray_equal(a, b)
    +
    +
    +@pytest.mark.filterwarnings("error")
    +@pytest.mark.parametrize(
    +    "duckarray",
    +    (
    +        pytest.param(np.array, id="numpy"),
    +        pytest.param(
    +            dask_from_array,
    +            id="dask",
    +            marks=pytest.mark.skipif(not has_dask, reason="requires dask"),
    +        ),
    +        pytest.param(
    +            quantity,
    +            id="pint",
    +            marks=[
    +                pytest.mark.skipif(not has_pint, reason="requires pint"),
    +                pytest.mark.xfail(
    +                    reason="inconsistencies in the return value of pint's implementation of eq"
    +                ),
    +            ],
    +        ),
    +    ),
    +)
    +@pytest.mark.parametrize(
    +    ["obj1", "obj2"],
    +    (
    +        pytest.param([0, 2], [0.0, 2.0], id="both arrays"),
    +        pytest.param([0, 0], 0.0, id="second scalar"),
    +        pytest.param(0.0, [0, 0], id="first scalar"),
    +    ),
    +)
    +def test_assert_duckarray_equal(duckarray, obj1, obj2):
    +    a = duckarray(obj1)
    +    b = duckarray(obj2)
    +
    +    xr.testing.assert_duckarray_equal(a, b)
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index 20a5f0e8613..619fa10116d 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -1,16 +1,16 @@
     import functools
     import operator
    -from distutils.version import LooseVersion
     
     import numpy as np
     import pandas as pd
     import pytest
     
     import xarray as xr
    +from xarray.core import dtypes
     from xarray.core.npcompat import IS_NEP18_ACTIVE
    -from xarray.testing import assert_allclose, assert_equal, assert_identical
     
    -from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects
    +from . import assert_allclose, assert_duckarray_allclose, assert_equal, assert_identical
    +from .test_variable import _PAD_XR_NP_ARGS
     
     pint = pytest.importorskip("pint")
     DimensionalityError = pint.errors.DimensionalityError
    @@ -26,7 +26,7 @@
         pytest.mark.skipif(
             not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled"
         ),
    -    # pytest.mark.filterwarnings("ignore:::pint[.*]"),
    +    pytest.mark.filterwarnings("error::pint.UnitStrippedWarning"),
     ]
     
     
    @@ -180,12 +180,7 @@ def attach_units(obj, units):
             new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs)
         elif isinstance(obj, xr.DataArray):
             # try the array name, "data" and None, then fall back to dimensionless
    -        data_units = (
    -            units.get(obj.name, None)
    -            or units.get("data", None)
    -            or units.get(None, None)
    -            or 1
    -        )
    +        data_units = units.get(obj.name, None) or units.get(None, None) or 1
     
             data = array_attach_units(obj.data, data_units)
     
    @@ -264,7 +259,7 @@ def assert_units_equal(a, b):
         assert extract_units(a) == extract_units(b)
     
     
    -@pytest.fixture(params=[float, int])
    +@pytest.fixture(params=[np.dtype(float), np.dtype(int)], ids=str)
     def dtype(request):
         return request.param
     
    @@ -364,14 +359,31 @@ def __repr__(self):
             return f"function_{self.name}"
     
     
    -def test_apply_ufunc_dataarray(dtype):
    +@pytest.mark.parametrize(
    +    "variant",
    +    (
    +        "data",
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        "coords",
    +    ),
    +)
    +def test_apply_ufunc_dataarray(variant, dtype):
    +    variants = {
    +        "data": (unit_registry.m, 1, 1),
    +        "dims": (1, unit_registry.m, 1),
    +        "coords": (1, 1, unit_registry.m),
    +    }
    +    data_unit, dim_unit, coord_unit = variants.get(variant)
         func = functools.partial(
             xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
         )
     
    -    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.m
    -    x = np.arange(20) * unit_registry.s
    -    data_array = xr.DataArray(data=array, dims="x", coords={"x": x})
    +    array = np.linspace(0, 10, 20).astype(dtype) * data_unit
    +    x = np.arange(20) * dim_unit
    +    u = np.linspace(-1, 1, 20) * coord_unit
    +    data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)})
     
         expected = attach_units(func(strip_units(data_array)), extract_units(data_array))
         actual = func(data_array)
    @@ -380,20 +392,39 @@ def test_apply_ufunc_dataarray(dtype):
         assert_identical(expected, actual)
     
     
    -def test_apply_ufunc_dataset(dtype):
    +@pytest.mark.parametrize(
    +    "variant",
    +    (
    +        "data",
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        "coords",
    +    ),
    +)
    +def test_apply_ufunc_dataset(variant, dtype):
    +    variants = {
    +        "data": (unit_registry.m, 1, 1),
    +        "dims": (1, unit_registry.m, 1),
    +        "coords": (1, 1, unit_registry.s),
    +    }
    +    data_unit, dim_unit, coord_unit = variants.get(variant)
    +
         func = functools.partial(
             xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
         )
     
    -    array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
    -    array2 = np.linspace(0, 10, 5).astype(dtype) * unit_registry.m
    +    array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit
    +    array2 = np.linspace(0, 10, 5).astype(dtype) * data_unit
    +
    +    x = np.arange(5) * dim_unit
    +    y = np.arange(10) * dim_unit
     
    -    x = np.arange(5) * unit_registry.s
    -    y = np.arange(10) * unit_registry.m
    +    u = np.linspace(-1, 1, 10) * coord_unit
     
         ds = xr.Dataset(
             data_vars={"a": (("x", "y"), array1), "b": ("x", array2)},
    -        coords={"x": x, "y": y},
    +        coords={"x": x, "y": y, "u": ("y", u)},
         )
     
         expected = attach_units(func(strip_units(ds)), extract_units(ds))
    @@ -403,10 +434,6 @@ def test_apply_ufunc_dataset(dtype):
         assert_identical(expected, actual)
     
     
    -# TODO: remove once pint==0.12 has been released
    -@pytest.mark.xfail(
    -    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -424,44 +451,61 @@ def test_apply_ufunc_dataset(dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
             "coords",
         ),
     )
    -@pytest.mark.parametrize("fill_value", (10, np.nan))
    -def test_align_dataarray(fill_value, variant, unit, error, dtype):
    +@pytest.mark.parametrize("value", (10, dtypes.NA))
    +def test_align_dataarray(value, variant, unit, error, dtype):
    +    if variant == "coords" and (
    +        value != dtypes.NA or isinstance(unit, unit_registry.Unit)
    +    ):
    +        pytest.xfail(
    +            reason=(
    +                "fill_value is used for both data variables and coords. "
    +                "See https://github.com/pydata/xarray/issues/4165"
    +            )
    +        )
    +
    +    fill_value = dtypes.get_fill_value(dtype) if value == dtypes.NA else value
    +
         original_unit = unit_registry.m
     
         variants = {
    -        "data": (unit, original_unit, original_unit),
    -        "dims": (original_unit, unit, original_unit),
    -        "coords": (original_unit, original_unit, unit),
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
         }
    -    data_unit, dim_unit, coord_unit = variants.get(variant)
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
     
    -    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
    -    array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
    -    x = np.arange(2) * original_unit
    +    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit1
    +    array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit2
     
    -    y1 = np.arange(5) * original_unit
    -    y2 = np.arange(2, 7) * dim_unit
    -    y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit
    -    y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit
    +    x = np.arange(2) * dim_unit1
    +    y1 = np.arange(5) * dim_unit1
    +    y2 = np.arange(2, 7) * dim_unit2
    +
    +    u1 = np.array([3, 5, 7, 8, 9]) * coord_unit1
    +    u2 = np.array([7, 8, 9, 11, 13]) * coord_unit2
     
         coords1 = {"x": x, "y": y1}
         coords2 = {"x": x, "y": y2}
         if variant == "coords":
    -        coords1["y_a"] = ("y", y_a1)
    -        coords2["y_a"] = ("y", y_a2)
    +        coords1["y_a"] = ("y", u1)
    +        coords2["y_a"] = ("y", u2)
     
         data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y"))
         data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y"))
     
    -    fill_value = fill_value * data_unit
    +    fill_value = fill_value * data_unit2
         func = function(xr.align, join="outer", fill_value=fill_value)
    -    if error is not None and not (
    -        np.isnan(fill_value) and not isinstance(fill_value, Quantity)
    -    ):
    +    if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)):
             with pytest.raises(error):
                 func(data_array1, data_array2)
     
    @@ -469,7 +513,7 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
     
         stripped_kwargs = {
             key: strip_units(
    -            convert_units(value, {None: original_unit if data_unit != 1 else None})
    +            convert_units(value, {None: data_unit1 if data_unit2 != 1 else None})
             )
             for key, value in func.kwargs.items()
         }
    @@ -494,10 +538,6 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
         assert_allclose(expected_b, actual_b)
     
     
    -# TODO: remove once pint==0.12 has been released
    -@pytest.mark.xfail(
    -    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -515,45 +555,61 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
             "coords",
         ),
     )
    -@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
    -def test_align_dataset(fill_value, unit, variant, error, dtype):
    +@pytest.mark.parametrize("value", (10, dtypes.NA))
    +def test_align_dataset(value, unit, variant, error, dtype):
    +    if variant == "coords" and (
    +        value != dtypes.NA or isinstance(unit, unit_registry.Unit)
    +    ):
    +        pytest.xfail(
    +            reason=(
    +                "fill_value is used for both data variables and coords. "
    +                "See https://github.com/pydata/xarray/issues/4165"
    +            )
    +        )
    +
    +    fill_value = dtypes.get_fill_value(dtype) if value == dtypes.NA else value
    +
         original_unit = unit_registry.m
     
         variants = {
    -        "data": (unit, original_unit, original_unit),
    -        "dims": (original_unit, unit, original_unit),
    -        "coords": (original_unit, original_unit, unit),
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
         }
    -    data_unit, dim_unit, coord_unit = variants.get(variant)
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
     
    -    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
    -    array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
    +    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit1
    +    array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit2
     
    -    x = np.arange(2) * original_unit
    +    x = np.arange(2) * dim_unit1
    +    y1 = np.arange(5) * dim_unit1
    +    y2 = np.arange(2, 7) * dim_unit2
     
    -    y1 = np.arange(5) * original_unit
    -    y2 = np.arange(2, 7) * dim_unit
    -    y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit
    -    y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit
    +    u1 = np.array([3, 5, 7, 8, 9]) * coord_unit1
    +    u2 = np.array([7, 8, 9, 11, 13]) * coord_unit2
     
         coords1 = {"x": x, "y": y1}
         coords2 = {"x": x, "y": y2}
         if variant == "coords":
    -        coords1["y_a"] = ("y", y_a1)
    -        coords2["y_a"] = ("y", y_a2)
    +        coords1["u"] = ("y", u1)
    +        coords2["u"] = ("y", u2)
     
         ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1)
         ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2)
     
    -    fill_value = fill_value * data_unit
    +    fill_value = fill_value * data_unit2
         func = function(xr.align, join="outer", fill_value=fill_value)
    -    if error is not None and not (
    -        np.isnan(fill_value) and not isinstance(fill_value, Quantity)
    -    ):
    +    if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)):
             with pytest.raises(error):
                 func(ds1, ds2)
     
    @@ -561,14 +617,14 @@ def test_align_dataset(fill_value, unit, variant, error, dtype):
     
         stripped_kwargs = {
             key: strip_units(
    -            convert_units(value, {None: original_unit if data_unit != 1 else None})
    +            convert_units(value, {None: data_unit1 if data_unit2 != 1 else None})
             )
             for key, value in func.kwargs.items()
         }
         units_a = extract_units(ds1)
         units_b = extract_units(ds2)
         expected_a, expected_b = func(
    -        strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs
    +        strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs,
         )
         expected_a = attach_units(expected_a, units_a)
         if isinstance(array2, Quantity):
    @@ -585,6 +641,7 @@ def test_align_dataset(fill_value, unit, variant, error, dtype):
     
     
     def test_broadcast_dataarray(dtype):
    +    # uses align internally so more thorough tests are not needed
         array1 = np.linspace(0, 10, 2) * unit_registry.Pa
         array2 = np.linspace(0, 10, 3) * unit_registry.Pa
     
    @@ -606,6 +663,7 @@ def test_broadcast_dataarray(dtype):
     
     
     def test_broadcast_dataset(dtype):
    +    # uses align internally so more thorough tests are not needed
         array1 = np.linspace(0, 10, 2) * unit_registry.Pa
         array2 = np.linspace(0, 10, 3) * unit_registry.Pa
     
    @@ -657,7 +715,9 @@ def test_broadcast_dataset(dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
             "coords",
         ),
     )
    @@ -665,31 +725,35 @@ def test_combine_by_coords(variant, unit, error, dtype):
         original_unit = unit_registry.m
     
         variants = {
    -        "data": (unit, original_unit, original_unit),
    -        "dims": (original_unit, unit, original_unit),
    -        "coords": (original_unit, original_unit, unit),
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
         }
    -    data_unit, dim_unit, coord_unit = variants.get(variant)
    -
    -    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
    -    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
    -    x = np.arange(1, 4) * 10 * original_unit
    -    y = np.arange(2) * original_unit
    -    z = np.arange(3) * original_unit
    -
    -    other_array1 = np.ones_like(array1) * data_unit
    -    other_array2 = np.ones_like(array2) * data_unit
    -    other_x = np.arange(1, 4) * 10 * dim_unit
    -    other_y = np.arange(2, 4) * dim_unit
    -    other_z = np.arange(3, 6) * coord_unit
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
    +
    +    array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1
    +    array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1
    +    x = np.arange(1, 4) * 10 * dim_unit1
    +    y = np.arange(2) * dim_unit1
    +    u = np.arange(3) * coord_unit1
    +
    +    other_array1 = np.ones_like(array1) * data_unit2
    +    other_array2 = np.ones_like(array2) * data_unit2
    +    other_x = np.arange(1, 4) * 10 * dim_unit2
    +    other_y = np.arange(2, 4) * dim_unit2
    +    other_u = np.arange(3, 6) * coord_unit2
     
         ds = xr.Dataset(
             data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
    -        coords={"x": x, "y": y, "z": ("x", z)},
    +        coords={"x": x, "y": y, "u": ("x", u)},
         )
         other = xr.Dataset(
             data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)},
    -        coords={"x": other_x, "y": other_y, "z": ("x", other_z)},
    +        coords={"x": other_x, "y": other_y, "u": ("x", other_u)},
         )
     
         if error is not None:
    @@ -728,7 +792,9 @@ def test_combine_by_coords(variant, unit, error, dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
             "coords",
         ),
     )
    @@ -736,18 +802,22 @@ def test_combine_nested(variant, unit, error, dtype):
         original_unit = unit_registry.m
     
         variants = {
    -        "data": (unit, original_unit, original_unit),
    -        "dims": (original_unit, unit, original_unit),
    -        "coords": (original_unit, original_unit, unit),
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
         }
    -    data_unit, dim_unit, coord_unit = variants.get(variant)
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
     
    -    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
    -    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
    +    array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1
    +    array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1
     
    -    x = np.arange(1, 4) * 10 * original_unit
    -    y = np.arange(2) * original_unit
    -    z = np.arange(3) * original_unit
    +    x = np.arange(1, 4) * 10 * dim_unit1
    +    y = np.arange(2) * dim_unit1
    +    z = np.arange(3) * coord_unit1
     
         ds1 = xr.Dataset(
             data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
    @@ -755,35 +825,35 @@ def test_combine_nested(variant, unit, error, dtype):
         )
         ds2 = xr.Dataset(
             data_vars={
    -            "a": (("y", "x"), np.ones_like(array1) * data_unit),
    -            "b": (("y", "x"), np.ones_like(array2) * data_unit),
    +            "a": (("y", "x"), np.ones_like(array1) * data_unit2),
    +            "b": (("y", "x"), np.ones_like(array2) * data_unit2),
             },
             coords={
    -            "x": np.arange(3) * dim_unit,
    -            "y": np.arange(2, 4) * dim_unit,
    -            "z": ("x", np.arange(-3, 0) * coord_unit),
    +            "x": np.arange(3) * dim_unit2,
    +            "y": np.arange(2, 4) * dim_unit2,
    +            "z": ("x", np.arange(-3, 0) * coord_unit2),
             },
         )
         ds3 = xr.Dataset(
             data_vars={
    -            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
    -            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
    +            "a": (("y", "x"), np.full_like(array1, fill_value=np.nan) * data_unit2),
    +            "b": (("y", "x"), np.full_like(array2, fill_value=np.nan) * data_unit2),
             },
             coords={
    -            "x": np.arange(3, 6) * dim_unit,
    -            "y": np.arange(4, 6) * dim_unit,
    -            "z": ("x", np.arange(3, 6) * coord_unit),
    +            "x": np.arange(3, 6) * dim_unit2,
    +            "y": np.arange(4, 6) * dim_unit2,
    +            "z": ("x", np.arange(3, 6) * coord_unit2),
             },
         )
         ds4 = xr.Dataset(
             data_vars={
    -            "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit),
    -            "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit),
    +            "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit2),
    +            "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit2),
             },
             coords={
    -            "x": np.arange(6, 9) * dim_unit,
    -            "y": np.arange(6, 8) * dim_unit,
    -            "z": ("x", np.arange(6, 9) * coord_unit),
    +            "x": np.arange(6, 9) * dim_unit2,
    +            "y": np.arange(6, 8) * dim_unit2,
    +            "z": ("x", np.arange(6, 9) * coord_unit2),
             },
         )
     
    @@ -828,22 +898,37 @@ def test_combine_nested(variant, unit, error, dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        "coords",
         ),
     )
     def test_concat_dataarray(variant, unit, error, dtype):
         original_unit = unit_registry.m
     
    -    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
    -    data_unit, dims_unit = variants.get(variant)
    +    variants = {
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
    +    }
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
    +
    +    array1 = np.linspace(0, 5, 10).astype(dtype) * data_unit1
    +    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit2
     
    -    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
    -    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
    -    x1 = np.arange(5, 15) * original_unit
    -    x2 = np.arange(5) * dims_unit
    +    x1 = np.arange(5, 15) * dim_unit1
    +    x2 = np.arange(5) * dim_unit2
    +
    +    u1 = np.linspace(1, 2, 10).astype(dtype) * coord_unit1
    +    u2 = np.linspace(0, 1, 5).astype(dtype) * coord_unit2
     
    -    arr1 = xr.DataArray(data=array1, coords={"x": x1}, dims="x")
    -    arr2 = xr.DataArray(data=array2, coords={"x": x2}, dims="x")
    +    arr1 = xr.DataArray(data=array1, coords={"x": x1, "u": ("x", u1)}, dims="x")
    +    arr2 = xr.DataArray(data=array2, coords={"x": x2, "u": ("x", u2)}, dims="x")
     
         if error is not None:
             with pytest.raises(error):
    @@ -881,22 +966,37 @@ def test_concat_dataarray(variant, unit, error, dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        "coords",
         ),
     )
     def test_concat_dataset(variant, unit, error, dtype):
         original_unit = unit_registry.m
     
    -    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
    -    data_unit, dims_unit = variants.get(variant)
    +    variants = {
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
    +    }
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
     
    -    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
    -    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
    -    x1 = np.arange(5, 15) * original_unit
    -    x2 = np.arange(5) * dims_unit
    +    array1 = np.linspace(0, 5, 10).astype(dtype) * data_unit1
    +    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit2
     
    -    ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1})
    -    ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2})
    +    x1 = np.arange(5, 15) * dim_unit1
    +    x2 = np.arange(5) * dim_unit2
    +
    +    u1 = np.linspace(1, 2, 10).astype(dtype) * coord_unit1
    +    u2 = np.linspace(0, 1, 5).astype(dtype) * coord_unit2
    +
    +    ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1, "u": ("x", u1)})
    +    ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2, "u": ("x", u2)})
     
         if error is not None:
             with pytest.raises(error):
    @@ -915,10 +1015,6 @@ def test_concat_dataset(variant, unit, error, dtype):
         assert_identical(expected, actual)
     
     
    -# TODO: remove once pint==0.12 has been released
    -@pytest.mark.xfail(
    -    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -936,7 +1032,9 @@ def test_concat_dataset(variant, unit, error, dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
             "coords",
         ),
     )
    @@ -944,29 +1042,33 @@ def test_merge_dataarray(variant, unit, error, dtype):
         original_unit = unit_registry.m
     
         variants = {
    -        "data": (unit, original_unit, original_unit),
    -        "dims": (original_unit, unit, original_unit),
    -        "coords": (original_unit, original_unit, unit),
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
         }
    -    data_unit, dim_unit, coord_unit = variants.get(variant)
    -
    -    array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit
    -    x1 = np.arange(2) * original_unit
    -    y1 = np.arange(3) * original_unit
    -    u1 = np.linspace(10, 20, 2) * original_unit
    -    v1 = np.linspace(10, 20, 3) * original_unit
    -
    -    array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit
    -    x2 = np.arange(2, 4) * dim_unit
    -    z2 = np.arange(4) * original_unit
    -    u2 = np.linspace(20, 30, 2) * coord_unit
    -    w2 = np.linspace(10, 20, 4) * original_unit
    -
    -    array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit
    -    y3 = np.arange(3, 6) * dim_unit
    -    z3 = np.arange(4, 8) * dim_unit
    -    v3 = np.linspace(10, 20, 3) * coord_unit
    -    w3 = np.linspace(10, 20, 4) * coord_unit
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
    +
    +    array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit1
    +    x1 = np.arange(2) * dim_unit1
    +    y1 = np.arange(3) * dim_unit1
    +    u1 = np.linspace(10, 20, 2) * coord_unit1
    +    v1 = np.linspace(10, 20, 3) * coord_unit1
    +
    +    array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit2
    +    x2 = np.arange(2, 4) * dim_unit2
    +    z2 = np.arange(4) * dim_unit1
    +    u2 = np.linspace(20, 30, 2) * coord_unit2
    +    w2 = np.linspace(10, 20, 4) * coord_unit1
    +
    +    array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit2
    +    y3 = np.arange(3, 6) * dim_unit2
    +    z3 = np.arange(4, 8) * dim_unit2
    +    v3 = np.linspace(10, 20, 3) * coord_unit2
    +    w3 = np.linspace(10, 20, 4) * coord_unit2
     
         arr1 = xr.DataArray(
             name="a",
    @@ -993,31 +1095,22 @@ def test_merge_dataarray(variant, unit, error, dtype):
     
             return
     
    -    units = {name: original_unit for name in list("axyzuvw")}
    -
    -    convert_and_strip = lambda arr: strip_units(convert_units(arr, units))
    -    expected_units = {
    -        "a": original_unit,
    -        "u": original_unit,
    -        "v": original_unit,
    -        "w": original_unit,
    -        "x": original_unit,
    -        "y": original_unit,
    -        "z": original_unit,
    +    units = {
    +        "a": data_unit1,
    +        "u": coord_unit1,
    +        "v": coord_unit1,
    +        "w": coord_unit1,
    +        "x": dim_unit1,
    +        "y": dim_unit1,
    +        "z": dim_unit1,
         }
    +    convert_and_strip = lambda arr: strip_units(convert_units(arr, units))
     
    -    expected = convert_units(
    -        attach_units(
    -            xr.merge(
    -                [
    -                    convert_and_strip(arr1),
    -                    convert_and_strip(arr2),
    -                    convert_and_strip(arr3),
    -                ]
    -            ),
    -            units,
    +    expected = attach_units(
    +        xr.merge(
    +            [convert_and_strip(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]
             ),
    -        expected_units,
    +        units,
         )
     
         actual = xr.merge([arr1, arr2, arr3])
    @@ -1026,10 +1119,6 @@ def test_merge_dataarray(variant, unit, error, dtype):
         assert_allclose(expected, actual)
     
     
    -# TODO: remove once pint==0.12 has been released
    -@pytest.mark.xfail(
    -    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -1047,7 +1136,9 @@ def test_merge_dataarray(variant, unit, error, dtype):
         "variant",
         (
             "data",
    -        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
             "coords",
         ),
     )
    @@ -1055,43 +1146,47 @@ def test_merge_dataset(variant, unit, error, dtype):
         original_unit = unit_registry.m
     
         variants = {
    -        "data": (unit, original_unit, original_unit),
    -        "dims": (original_unit, unit, original_unit),
    -        "coords": (original_unit, original_unit, unit),
    +        "data": ((original_unit, unit), (1, 1), (1, 1)),
    +        "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +        "coords": ((1, 1), (1, 1), (original_unit, unit)),
         }
    -    data_unit, dim_unit, coord_unit = variants.get(variant)
    +    (
    +        (data_unit1, data_unit2),
    +        (dim_unit1, dim_unit2),
    +        (coord_unit1, coord_unit2),
    +    ) = variants.get(variant)
     
    -    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
    -    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
    +    array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1
    +    array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1
     
    -    x = np.arange(11, 14) * original_unit
    -    y = np.arange(2) * original_unit
    -    z = np.arange(3) * original_unit
    +    x = np.arange(11, 14) * dim_unit1
    +    y = np.arange(2) * dim_unit1
    +    u = np.arange(3) * coord_unit1
     
         ds1 = xr.Dataset(
             data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
    -        coords={"x": x, "y": y, "u": ("x", z)},
    +        coords={"x": x, "y": y, "u": ("x", u)},
         )
         ds2 = xr.Dataset(
             data_vars={
    -            "a": (("y", "x"), np.ones_like(array1) * data_unit),
    -            "b": (("y", "x"), np.ones_like(array2) * data_unit),
    +            "a": (("y", "x"), np.ones_like(array1) * data_unit2),
    +            "b": (("y", "x"), np.ones_like(array2) * data_unit2),
             },
             coords={
    -            "x": np.arange(3) * dim_unit,
    -            "y": np.arange(2, 4) * dim_unit,
    -            "u": ("x", np.arange(-3, 0) * coord_unit),
    +            "x": np.arange(3) * dim_unit2,
    +            "y": np.arange(2, 4) * dim_unit2,
    +            "u": ("x", np.arange(-3, 0) * coord_unit2),
             },
         )
         ds3 = xr.Dataset(
             data_vars={
    -            "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit),
    -            "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit),
    +            "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit2),
    +            "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit2),
             },
             coords={
    -            "x": np.arange(3, 6) * dim_unit,
    -            "y": np.arange(4, 6) * dim_unit,
    -            "u": ("x", np.arange(3, 6) * coord_unit),
    +            "x": np.arange(3, 6) * dim_unit2,
    +            "y": np.arange(4, 6) * dim_unit2,
    +            "u": ("x", np.arange(3, 6) * coord_unit2),
             },
         )
     
    @@ -1104,15 +1199,9 @@ def test_merge_dataset(variant, unit, error, dtype):
     
         units = extract_units(ds1)
         convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
    -    expected_units = {name: original_unit for name in list("abxyzu")}
    -    expected = convert_units(
    -        attach_units(
    -            func(
    -                [convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]
    -            ),
    -            units,
    -        ),
    -        expected_units,
    +    expected = attach_units(
    +        func([convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]),
    +        units,
         )
         actual = func([ds1, ds2, ds3])
     
    @@ -1120,35 +1209,79 @@ def test_merge_dataset(variant, unit, error, dtype):
         assert_allclose(expected, actual)
     
     
    +@pytest.mark.parametrize(
    +    "variant",
    +    (
    +        "data",
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        "coords",
    +    ),
    +)
     @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
    -def test_replication_dataarray(func, dtype):
    -    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
    -    data_array = xr.DataArray(data=array, dims="x")
    +def test_replication_dataarray(func, variant, dtype):
    +    unit = unit_registry.m
    +
    +    variants = {
    +        "data": (unit, 1, 1),
    +        "dims": (1, unit, 1),
    +        "coords": (1, 1, unit),
    +    }
    +    data_unit, dim_unit, coord_unit = variants.get(variant)
     
    -    numpy_func = getattr(np, func.__name__)
    -    units = extract_units(numpy_func(data_array))
    -    expected = attach_units(func(data_array), units)
    +    array = np.linspace(0, 10, 20).astype(dtype) * data_unit
    +    x = np.arange(20) * dim_unit
    +    u = np.linspace(0, 1, 20) * coord_unit
    +
    +    data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)})
    +    units = extract_units(data_array)
    +    units.pop(data_array.name)
    +
    +    expected = attach_units(func(strip_units(data_array)), units)
         actual = func(data_array)
     
         assert_units_equal(expected, actual)
         assert_identical(expected, actual)
     
     
    +@pytest.mark.parametrize(
    +    "variant",
    +    (
    +        "data",
    +        pytest.param(
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        "coords",
    +    ),
    +)
     @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
    -def test_replication_dataset(func, dtype):
    -    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
    -    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
    -    x = np.arange(20).astype(dtype) * unit_registry.m
    -    y = np.arange(10).astype(dtype) * unit_registry.m
    -    z = y.to(unit_registry.mm)
    +def test_replication_dataset(func, variant, dtype):
    +    unit = unit_registry.m
    +
    +    variants = {
    +        "data": ((unit_registry.m, unit_registry.Pa), 1, 1),
    +        "dims": ((1, 1), unit, 1),
    +        "coords": ((1, 1), 1, unit),
    +    }
    +    (data_unit1, data_unit2), dim_unit, coord_unit = variants.get(variant)
    +
    +    array1 = np.linspace(0, 10, 20).astype(dtype) * data_unit1
    +    array2 = np.linspace(5, 10, 10).astype(dtype) * data_unit2
    +    x = np.arange(20).astype(dtype) * dim_unit
    +    y = np.arange(10).astype(dtype) * dim_unit
    +    u = np.linspace(0, 1, 10) * coord_unit
     
         ds = xr.Dataset(
             data_vars={"a": ("x", array1), "b": ("y", array2)},
    -        coords={"x": x, "y": y, "z": ("y", z)},
    +        coords={"x": x, "y": y, "u": ("y", u)},
         )
    +    units = {
    +        name: unit
    +        for name, unit in extract_units(ds).items()
    +        if name not in ds.data_vars
    +    }
     
    -    numpy_func = getattr(np, func.__name__)
    -    units = extract_units(ds.map(numpy_func))
         expected = attach_units(func(strip_units(ds)), units)
     
         actual = func(ds)
    @@ -1157,37 +1290,40 @@ def test_replication_dataset(func, dtype):
         assert_identical(expected, actual)
     
     
    -@pytest.mark.xfail(
    -    reason=(
    -        "pint is undecided on how `full_like` should work, so incorrect errors "
    -        "may be expected: hgrecco/pint#882"
    -    )
    -)
     @pytest.mark.parametrize(
    -    "unit,error",
    +    "variant",
         (
    -        pytest.param(1, DimensionalityError, id="no_unit"),
    +        "data",
             pytest.param(
    -            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        pytest.param(
    +            "coords",
    +            marks=pytest.mark.xfail(reason="can't copy quantity into non-quantity"),
             ),
    -        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
    -        pytest.param(unit_registry.ms, None, id="compatible_unit"),
    -        pytest.param(unit_registry.s, None, id="identical_unit"),
         ),
    -    ids=repr,
     )
    -def test_replication_full_like_dataarray(unit, error, dtype):
    -    array = np.linspace(0, 5, 10) * unit_registry.s
    -    data_array = xr.DataArray(data=array, dims="x")
    +def test_replication_full_like_dataarray(variant, dtype):
    +    # since full_like will strip units and then use the units of the
    +    # fill value, we don't need to try multiple units
    +    unit = unit_registry.m
     
    -    fill_value = -1 * unit
    -    if error is not None:
    -        with pytest.raises(error):
    -            xr.full_like(data_array, fill_value=fill_value)
    +    variants = {
    +        "data": (unit, 1, 1),
    +        "dims": (1, unit, 1),
    +        "coords": (1, 1, unit),
    +    }
    +    data_unit, dim_unit, coord_unit = variants.get(variant)
     
    -        return
    +    array = np.linspace(0, 5, 10) * data_unit
    +    x = np.arange(10) * dim_unit
    +    u = np.linspace(0, 1, 10) * coord_unit
    +    data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)})
    +
    +    fill_value = -1 * unit_registry.degK
     
    -    units = {**extract_units(data_array), **{None: unit if unit != 1 else None}}
    +    units = extract_units(data_array)
    +    units[data_array.name] = fill_value.units
         expected = attach_units(
             xr.full_like(strip_units(data_array), fill_value=strip_units(fill_value)), units
         )
    @@ -1197,47 +1333,46 @@ def test_replication_full_like_dataarray(unit, error, dtype):
         assert_identical(expected, actual)
     
     
    -@pytest.mark.xfail(
    -    reason=(
    -        "pint is undecided on how `full_like` should work, so incorrect errors "
    -        "may be expected: hgrecco/pint#882"
    -    )
    -)
     @pytest.mark.parametrize(
    -    "unit,error",
    +    "variant",
         (
    -        pytest.param(1, DimensionalityError, id="no_unit"),
    +        "data",
             pytest.param(
    -            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +            "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +        ),
    +        pytest.param(
    +            "coords",
    +            marks=pytest.mark.xfail(reason="can't copy quantity into non-quantity"),
             ),
    -        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
    -        pytest.param(unit_registry.ms, None, id="compatible_unit"),
    -        pytest.param(unit_registry.s, None, id="identical_unit"),
         ),
    -    ids=repr,
     )
    -def test_replication_full_like_dataset(unit, error, dtype):
    -    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
    -    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
    -    x = np.arange(20).astype(dtype) * unit_registry.m
    -    y = np.arange(10).astype(dtype) * unit_registry.m
    -    z = y.to(unit_registry.mm)
    +def test_replication_full_like_dataset(variant, dtype):
    +    unit = unit_registry.m
    +
    +    variants = {
    +        "data": ((unit_registry.s, unit_registry.Pa), 1, 1),
    +        "dims": ((1, 1), unit, 1),
    +        "coords": ((1, 1), 1, unit),
    +    }
    +    (data_unit1, data_unit2), dim_unit, coord_unit = variants.get(variant)
    +
    +    array1 = np.linspace(0, 10, 20).astype(dtype) * data_unit1
    +    array2 = np.linspace(5, 10, 10).astype(dtype) * data_unit2
    +    x = np.arange(20).astype(dtype) * dim_unit
    +    y = np.arange(10).astype(dtype) * dim_unit
    +
    +    u = np.linspace(0, 1, 10) * coord_unit
     
         ds = xr.Dataset(
             data_vars={"a": ("x", array1), "b": ("y", array2)},
    -        coords={"x": x, "y": y, "z": ("y", z)},
    +        coords={"x": x, "y": y, "u": ("y", u)},
         )
     
    -    fill_value = -1 * unit
    -    if error is not None:
    -        with pytest.raises(error):
    -            xr.full_like(ds, fill_value=fill_value)
    -
    -        return
    +    fill_value = -1 * unit_registry.degK
     
         units = {
             **extract_units(ds),
    -        **{name: unit if unit != 1 else None for name in ds.data_vars},
    +        **{name: unit_registry.degK for name in ds.data_vars},
         }
         expected = attach_units(
             xr.full_like(strip_units(ds), fill_value=strip_units(fill_value)), units
    @@ -1308,10 +1443,9 @@ def test_where_dataarray(fill_value, unit, error, dtype):
     def test_where_dataset(fill_value, unit, error, dtype):
         array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
         array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m
    -    x = np.arange(10) * unit_registry.s
     
    -    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
    -    cond = x < 5 * unit_registry.s
    +    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)})
    +    cond = array1 < 2 * unit_registry.m
         fill_value = fill_value * unit
     
         if error is not None and not (
    @@ -1358,61 +1492,7 @@ def test_dot_dataarray(dtype):
         assert_identical(expected, actual)
     
     
    -def delete_attrs(*to_delete):
    -    def wrapper(cls):
    -        for item in to_delete:
    -            setattr(cls, item, None)
    -
    -        return cls
    -
    -    return wrapper
    -
    -
    -@delete_attrs(
    -    "test_getitem_with_mask",
    -    "test_getitem_with_mask_nd_indexer",
    -    "test_index_0d_string",
    -    "test_index_0d_datetime",
    -    "test_index_0d_timedelta64",
    -    "test_0d_time_data",
    -    "test_index_0d_not_a_time",
    -    "test_datetime64_conversion",
    -    "test_timedelta64_conversion",
    -    "test_pandas_period_index",
    -    "test_1d_reduce",
    -    "test_array_interface",
    -    "test___array__",
    -    "test_copy_index",
    -    "test_concat_number_strings",
    -    "test_concat_fixed_len_str",
    -    "test_concat_mixed_dtypes",
    -    "test_pandas_datetime64_with_tz",
    -    "test_pandas_data",
    -    "test_multiindex",
    -)
    -class TestVariable(VariableSubclassobjects):
    -    @staticmethod
    -    def cls(dims, data, *args, **kwargs):
    -        return xr.Variable(
    -            dims, unit_registry.Quantity(data, unit_registry.m), *args, **kwargs
    -        )
    -
    -    def example_1d_objects(self):
    -        for data in [
    -            range(3),
    -            0.5 * np.arange(3),
    -            0.5 * np.arange(3, dtype=np.float32),
    -            np.array(["a", "b", "c"], dtype=object),
    -        ]:
    -            yield (self.cls("x", data), data)
    -
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -    )
    -    def test_real_and_imag(self):
    -        super().test_real_and_imag()
    -
    +class TestVariable:
         @pytest.mark.parametrize(
             "func",
             (
    @@ -1454,22 +1534,14 @@ def test_aggregation(self, func, dtype):
             assert_units_equal(expected, actual)
             assert_allclose(expected, actual)
     
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -    )
         def test_aggregate_complex(self):
             variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m)
             expected = xr.Variable((), (0.5 + 1j) * unit_registry.m)
             actual = variable.mean()
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -1526,7 +1598,7 @@ def test_numpy_methods(self, func, unit, error, dtype):
             actual = func(variable, *args, **kwargs)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "func", (method("item", 5), method("searchsorted", 5)), ids=repr
    @@ -1586,7 +1658,7 @@ def test_raw_numpy_methods(self, func, unit, error, dtype):
             actual = func(variable, *args, **kwargs)
     
             assert_units_equal(expected, actual)
    -        np.testing.assert_allclose(expected, actual)
    +        assert_duckarray_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "func", (method("isnull"), method("notnull"), method("count")), ids=repr
    @@ -1609,7 +1681,7 @@ def test_missing_value_detection(self, func):
             actual = func(variable)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -1655,7 +1727,7 @@ def test_missing_value_fillna(self, unit, error):
             actual = variable.fillna(value=fill_value)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -1766,12 +1838,8 @@ def test_isel(self, indices, dtype):
             actual = variable.isel(x=indices)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -    )
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -1828,7 +1896,7 @@ def test_1d_math(self, func, unit, error, dtype):
             actual = func(variable, y)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -1877,43 +1945,30 @@ def test_masking(self, func, unit, error, dtype):
             actual = func(variable, cond, other)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    -    def test_squeeze(self, dtype):
    +    @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all"))
    +    def test_squeeze(self, dim, dtype):
             shape = (2, 1, 3, 1, 1, 2)
             names = list("abcdef")
    +        dim_lengths = dict(zip(names, shape))
             array = np.ones(shape=shape) * unit_registry.m
             variable = xr.Variable(names, array)
     
    +        kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {}
             expected = attach_units(
    -            strip_units(variable).squeeze(), extract_units(variable)
    +            strip_units(variable).squeeze(**kwargs), extract_units(variable)
             )
    -        actual = variable.squeeze()
    +        actual = variable.squeeze(**kwargs)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    -
    -        names = tuple(name for name, size in zip(names, shape) if shape == 1)
    -        for name in names:
    -            expected = attach_units(
    -                strip_units(variable).squeeze(dim=name), extract_units(variable)
    -            )
    -            actual = variable.squeeze(dim=name)
    -
    -            assert_units_equal(expected, actual)
    -            xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
             (
                 method("coarsen", windows={"y": 2}, func=np.mean),
    -            pytest.param(
    -                method("quantile", q=[0.25, 0.75]),
    -                marks=pytest.mark.xfail(
    -                    LooseVersion(pint.__version__) <= "0.12",
    -                    reason="quantile / nanquantile not implemented yet",
    -                ),
    -            ),
    +            method("quantile", q=[0.25, 0.75]),
                 pytest.param(
                     method("rank", dim="x"),
                     marks=pytest.mark.xfail(reason="rank not implemented for non-ndarray"),
    @@ -1940,7 +1995,7 @@ def test_computation(self, func, dtype):
             actual = func(variable)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -1986,7 +2041,7 @@ def test_stack(self, dtype):
             actual = variable.stack(z=("x", "y"))
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         def test_unstack(self, dtype):
             array = np.linspace(0, 5, 3 * 10).astype(dtype) * unit_registry.m
    @@ -1998,7 +2053,7 @@ def test_unstack(self, dtype):
             actual = variable.unstack(z={"x": 3, "y": 10})
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -2038,7 +2093,7 @@ def test_concat(self, unit, error, dtype):
             actual = xr.Variable.concat([variable, other], dim="y")
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         def test_set_dims(self, dtype):
             array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m
    @@ -2051,7 +2106,7 @@ def test_set_dims(self, dtype):
             actual = variable.set_dims(dims)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         def test_copy(self, dtype):
             array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
    @@ -2064,7 +2119,7 @@ def test_copy(self, dtype):
             actual = variable.copy(data=other)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -2105,45 +2160,43 @@ def test_no_conflicts(self, unit, dtype):
     
             assert expected == actual
     
    +    @pytest.mark.parametrize(
    +        "mode",
    +        [
    +            "constant",
    +            "mean",
    +            "median",
    +            "reflect",
    +            "edge",
    +            pytest.param(
    +                "linear_ramp",
    +                marks=pytest.mark.xfail(
    +                    reason="pint bug: https://github.com/hgrecco/pint/issues/1026"
    +                ),
    +            ),
    +            "maximum",
    +            "minimum",
    +            "symmetric",
    +            "wrap",
    +        ],
    +    )
         @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS)
    -    def test_pad_constant_values(self, dtype, xr_arg, np_arg):
    -        data = np.arange(4 * 3 * 2).reshape(4, 3, 2).astype(dtype) * unit_registry.m
    +    def test_pad(self, mode, xr_arg, np_arg):
    +        data = np.arange(4 * 3 * 2).reshape(4, 3, 2) * unit_registry.m
             v = xr.Variable(["x", "y", "z"], data)
     
    -        actual = v.pad(**xr_arg, mode="constant")
    -        expected = xr.Variable(
    -            v.dims,
    -            np.pad(
    -                v.data.astype(float), np_arg, mode="constant", constant_values=np.nan,
    -            ),
    +        expected = attach_units(
    +            strip_units(v).pad(mode=mode, **xr_arg), extract_units(v),
             )
    -        xr.testing.assert_identical(expected, actual)
    -        assert_units_equal(expected, actual)
    -        assert isinstance(actual._data, type(v._data))
    +        actual = v.pad(mode=mode, **xr_arg)
     
    -        # for the boolean array, we pad False
    -        data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2)
    -        v = xr.Variable(["x", "y", "z"], data)
    -        actual = v.pad(**xr_arg, mode="constant", constant_values=data.flat[0])
    -        expected = xr.Variable(
    -            v.dims,
    -            np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]),
    -        )
    -        xr.testing.assert_identical(actual, expected)
             assert_units_equal(expected, actual)
    +        assert_equal(actual, expected)
     
         @pytest.mark.parametrize(
             "unit,error",
             (
    -            pytest.param(
    -                1,
    -                DimensionalityError,
    -                id="no_unit",
    -                marks=pytest.mark.xfail(
    -                    LooseVersion(pint.__version__) < LooseVersion("0.10.2"),
    -                    reason="bug in pint's implementation of np.pad",
    -                ),
    -            ),
    +            pytest.param(1, DimensionalityError, id="no_unit"),
                 pytest.param(
                     unit_registry.dimensionless, DimensionalityError, id="dimensionless"
                 ),
    @@ -2176,17 +2229,16 @@ def test_pad_unit_constant_value(self, unit, error, dtype):
             actual = func(variable, constant_values=fill_value)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
     
     class TestDataArray:
    -    @pytest.mark.filterwarnings("error:::pint[.*]")
         @pytest.mark.parametrize(
             "variant",
             (
                 pytest.param(
                     "with_dims",
    -                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
    +                marks=pytest.mark.xfail(reason="indexes don't support units"),
                 ),
                 "with_coords",
                 "without_coords",
    @@ -2215,7 +2267,6 @@ def test_init(self, variant, dtype):
                 }.values()
             )
     
    -    @pytest.mark.filterwarnings("error:::pint[.*]")
         @pytest.mark.parametrize(
             "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
         )
    @@ -2224,7 +2275,7 @@ def test_init(self, variant, dtype):
             (
                 pytest.param(
                     "with_dims",
    -                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
    +                marks=pytest.mark.xfail(reason="indexes don't support units"),
                 ),
                 pytest.param("with_coords"),
                 pytest.param("without_coords"),
    @@ -2248,10 +2299,6 @@ def test_repr(self, func, variant, dtype):
             # warnings or errors, but does not check the result
             func(data_array)
     
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose",
    -    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -2345,7 +2392,7 @@ def test_unary_operations(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -2365,14 +2412,21 @@ def test_binary_operations(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "comparison",
             (
                 pytest.param(operator.lt, id="less_than"),
                 pytest.param(operator.ge, id="greater_equal"),
    -            pytest.param(operator.eq, id="equal"),
    +            pytest.param(
    +                operator.eq,
    +                id="equal",
    +                marks=pytest.mark.xfail(
    +                    # LooseVersion(pint.__version__) < "0.14",
    +                    reason="inconsistencies in the return values of pint's eq",
    +                ),
    +            ),
             ),
         )
         @pytest.mark.parametrize(
    @@ -2416,7 +2470,7 @@ def test_comparison_operations(self, comparison, unit, error, dtype):
             )
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "units,error",
    @@ -2445,7 +2499,7 @@ def test_univariate_ufunc(self, units, error, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__")
         @pytest.mark.parametrize(
    @@ -2487,11 +2541,11 @@ def test_bivariate_ufunc(self, unit, error, dtype):
     
             actual = np.maximum(data_array, 1 * unit)
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
             actual = np.maximum(1 * unit, data_array)
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize("property", ("T", "imag", "real"))
         def test_numpy_properties(self, property, dtype):
    @@ -2508,7 +2562,7 @@ def test_numpy_properties(self, property, dtype):
             actual = getattr(data_array, property)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -2524,7 +2578,7 @@ def test_numpy_methods(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         def test_item(self, dtype):
             array = np.arange(10).astype(dtype) * unit_registry.m
    @@ -2535,7 +2589,7 @@ def test_item(self, dtype):
             expected = func(strip_units(data_array)) * unit_registry.m
             actual = func(data_array)
     
    -        np.testing.assert_allclose(expected, actual)
    +        assert_duckarray_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -2650,7 +2704,7 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
             actual = func(data_array, *args, **kwargs)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func", (method("isnull"), method("notnull"), method("count")), ids=repr
    @@ -2673,7 +2727,7 @@ def test_missing_value_detection(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="ffill and bfill lose units in data")
         @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
    @@ -2691,7 +2745,7 @@ def test_missing_value_filling(self, func, dtype):
             actual = func(data_array, dim="x")
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -2740,7 +2794,7 @@ def test_fillna(self, fill_value, unit, error, dtype):
             actual = func(data_array, value=value)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         def test_dropna(self, dtype):
             array = (
    @@ -2755,7 +2809,7 @@ def test_dropna(self, dtype):
             actual = data_array.dropna(dim="x")
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -2784,7 +2838,7 @@ def test_isin(self, unit, dtype):
             actual = data_array.isin(values)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "variant", ("masking", "replacing_scalar", "replacing_array", "dropping")
    @@ -2838,7 +2892,7 @@ def test_where(self, variant, unit, error, dtype):
             actual = data_array.where(**kwargs)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="uses numpy.vectorize")
         def test_interpolate_na(self):
    @@ -2854,7 +2908,7 @@ def test_interpolate_na(self):
             actual = data_array.interpolate_na(dim="x")
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -2895,7 +2949,7 @@ def test_combine_first(self, unit, error, dtype):
             actual = data_array.combine_first(other)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -2984,17 +3038,47 @@ def is_compatible(a, b):
                 pytest.param(unit_registry.m, id="identical_unit"),
             ),
         )
    -    def test_broadcast_like(self, unit, dtype):
    -        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa
    -        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
    +    def test_broadcast_like(self, variant, unit, dtype):
    +        original_unit = unit_registry.m
    +
    +        variants = {
    +            "data": ((original_unit, unit), (1, 1), (1, 1)),
    +            "dims": ((1, 1), (original_unit, unit), (1, 1)),
    +            "coords": ((1, 1), (1, 1), (original_unit, unit)),
    +        }
    +        (
    +            (data_unit1, data_unit2),
    +            (dim_unit1, dim_unit2),
    +            (coord_unit1, coord_unit2),
    +        ) = variants.get(variant)
    +
    +        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1
    +        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2
    +
    +        x1 = np.arange(2) * dim_unit1
    +        x2 = np.arange(2) * dim_unit2
    +        y1 = np.array([0]) * dim_unit1
    +        y2 = np.arange(3) * dim_unit2
     
    -        x1 = np.arange(2) * unit_registry.m
    -        x2 = np.arange(2) * unit
    -        y1 = np.array([0]) * unit_registry.m
    -        y2 = np.arange(3) * unit
    +        u1 = np.linspace(0, 1, 2) * coord_unit1
    +        u2 = np.linspace(0, 1, 2) * coord_unit2
     
    -        arr1 = xr.DataArray(data=array1, coords={"x": x1, "y": y1}, dims=("x", "y"))
    -        arr2 = xr.DataArray(data=array2, coords={"x": x2, "y": y2}, dims=("x", "y"))
    +        arr1 = xr.DataArray(
    +            data=array1, coords={"x": x1, "y": y1, "u": ("x", u1)}, dims=("x", "y")
    +        )
    +        arr2 = xr.DataArray(
    +            data=array2, coords={"x": x2, "y": y2, "u": ("x", u2)}, dims=("x", "y")
    +        )
     
             expected = attach_units(
                 strip_units(arr1).broadcast_like(strip_units(arr2)), extract_units(arr1)
    @@ -3002,7 +3086,7 @@ def test_broadcast_like(self, unit, dtype):
             actual = arr1.broadcast_like(arr2)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -3032,56 +3116,89 @@ def test_broadcast_equals(self, unit, dtype):
     
             assert expected == actual
     
    +    def test_pad(self, dtype):
    +        array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
    +
    +        data_array = xr.DataArray(data=array, dims="x")
    +        units = extract_units(data_array)
    +
    +        expected = attach_units(strip_units(data_array).pad(x=(2, 3)), units)
    +        actual = data_array.pad(x=(2, 3))
    +
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
    +
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
         @pytest.mark.parametrize(
             "func",
             (
                 method("pipe", lambda da: da * 10),
    -            method("assign_coords", y2=("y", np.arange(10) * unit_registry.mm)),
    +            method("assign_coords", w=("y", np.arange(10) * unit_registry.mm)),
                 method("assign_attrs", attr1="value"),
    -            method("rename", x2="x_mm"),
    -            method("swap_dims", {"x": "x2"}),
    -            method(
    -                "expand_dims",
    -                dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
    -                axis=1,
    +            method("rename", u="v"),
    +            pytest.param(
    +                method("swap_dims", {"x": "u"}),
    +                marks=pytest.mark.xfail(reason="indexes don't support units"),
    +            ),
    +            pytest.param(
    +                method(
    +                    "expand_dims",
    +                    dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
    +                    axis=1,
    +                ),
    +                marks=pytest.mark.xfail(reason="indexes don't support units"),
                 ),
                 method("drop_vars", "x"),
    -            method("reset_coords", names="x2"),
    +            method("reset_coords", names="u"),
                 method("copy"),
                 method("astype", np.float32),
             ),
             ids=repr,
         )
    -    def test_content_manipulation(self, func, dtype):
    -        quantity = (
    -            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
    -            * unit_registry.pascal
    -        )
    -        x = np.arange(quantity.shape[0]) * unit_registry.m
    -        y = np.arange(quantity.shape[1]) * unit_registry.m
    -        x2 = x.to(unit_registry.mm)
    +    def test_content_manipulation(self, func, variant, dtype):
    +        unit = unit_registry.m
    +
    +        variants = {
    +            "data": (unit, 1, 1),
    +            "dims": (1, unit, 1),
    +            "coords": (1, 1, unit),
    +        }
    +        data_unit, dim_unit, coord_unit = variants.get(variant)
    +
    +        quantity = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit
    +        x = np.arange(quantity.shape[0]) * dim_unit
    +        y = np.arange(quantity.shape[1]) * dim_unit
    +        u = np.linspace(0, 1, quantity.shape[0]) * coord_unit
     
             data_array = xr.DataArray(
    -            name="data",
    +            name="a",
                 data=quantity,
    -            coords={"x": x, "x2": ("x", x2), "y": y},
    +            coords={"x": x, "u": ("x", u), "y": y},
                 dims=("x", "y"),
             )
     
             stripped_kwargs = {
                 key: array_strip_units(value) for key, value in func.kwargs.items()
             }
    -        units = {**{"x_mm": x2.units, "x2": x2.units}, **extract_units(data_array)}
    +        units = extract_units(data_array)
    +        units["u"] = getattr(u, "units", None)
    +        units["v"] = getattr(u, "units", None)
     
             expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units)
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    -    @pytest.mark.parametrize(
    -        "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr
    -    )
         @pytest.mark.parametrize(
             "unit",
             (
    @@ -3090,22 +3207,20 @@ def test_content_manipulation(self, func, dtype):
                 pytest.param(unit_registry.degK, id="with_unit"),
             ),
         )
    -    def test_content_manipulation_with_units(self, func, unit, dtype):
    +    def test_copy(self, unit, dtype):
             quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal
    -        x = np.arange(len(quantity)) * unit_registry.m
    -
    -        data_array = xr.DataArray(data=quantity, coords={"x": x}, dims="x")
    +        new_data = np.arange(20)
     
    -        kwargs = {key: value * unit for key, value in func.kwargs.items()}
    +        data_array = xr.DataArray(data=quantity, dims="x")
     
             expected = attach_units(
    -            func(strip_units(data_array)), {None: unit, "x": x.units}
    +            strip_units(data_array).copy(data=new_data), {None: unit}
             )
     
    -        actual = func(data_array, **kwargs)
    +        actual = data_array.copy(data=new_data * unit)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "indices",
    @@ -3115,10 +3230,10 @@ def test_content_manipulation_with_units(self, func, unit, dtype):
             ),
         )
         def test_isel(self, indices, dtype):
    +        # TODO: maybe test for units in indexes?
             array = np.arange(10).astype(dtype) * unit_registry.s
    -        x = np.arange(len(array)) * unit_registry.m
     
    -        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
    +        data_array = xr.DataArray(data=array, dims="x")
     
             expected = attach_units(
                 strip_units(data_array).isel(x=indices), extract_units(data_array)
    @@ -3126,7 +3241,7 @@ def test_isel(self, indices, dtype):
             actual = data_array.isel(x=indices)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3171,7 +3286,7 @@ def test_sel(self, raw_values, unit, error, dtype):
             actual = data_array.sel(x=values)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3216,7 +3331,7 @@ def test_loc(self, raw_values, unit, error, dtype):
             actual = data_array.loc[{"x": values}]
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3261,8 +3376,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
             actual = data_array.drop_sel(x=values)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    +    @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all"))
         @pytest.mark.parametrize(
             "shape",
             (
    @@ -3273,36 +3389,22 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
                 pytest.param((1, 10, 1, 20), id="first_and_last_dimension_squeezable"),
             ),
         )
    -    def test_squeeze(self, shape, dtype):
    +    def test_squeeze(self, shape, dim, dtype):
    +        names = "xyzt"
    +        dim_lengths = dict(zip(names, shape))
             names = "xyzt"
    -        coords = {
    -            name: np.arange(length).astype(dtype)
    -            * (unit_registry.m if name != "t" else unit_registry.s)
    -            for name, length in zip(names, shape)
    -        }
             array = np.arange(10 * 20).astype(dtype).reshape(shape) * unit_registry.J
    -        data_array = xr.DataArray(
    -            data=array, coords=coords, dims=tuple(names[: len(shape)])
    -        )
    +        data_array = xr.DataArray(data=array, dims=tuple(names[: len(shape)]))
    +
    +        kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {}
     
             expected = attach_units(
    -            strip_units(data_array).squeeze(), extract_units(data_array)
    +            strip_units(data_array).squeeze(**kwargs), extract_units(data_array)
             )
    -        actual = data_array.squeeze()
    +        actual = data_array.squeeze(**kwargs)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    -
    -        # try squeezing the dimensions separately
    -        names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
    -        for index, name in enumerate(names):
    -            expected = attach_units(
    -                strip_units(data_array).squeeze(dim=name), extract_units(data_array)
    -            )
    -            actual = data_array.squeeze(dim=name)
    -
    -            assert_units_equal(expected, actual)
    -            xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3310,14 +3412,10 @@ def test_squeeze(self, shape, dtype):
             ids=repr,
         )
         def test_head_tail_thin(self, func, dtype):
    +        # TODO: works like isel. Maybe also test units in indexes?
             array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
     
    -        coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    -        }
    -
    -        data_array = xr.DataArray(data=array, coords=coords, dims=("x", "y"))
    +        data_array = xr.DataArray(data=array, dims=("x", "y"))
     
             expected = attach_units(
                 func(strip_units(data_array)), extract_units(data_array)
    @@ -3325,12 +3423,8 @@ def test_head_tail_thin(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -    )
         @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
             "func",
    @@ -3361,7 +3455,7 @@ def test_interp_reindex(self, variant, func, dtype):
             actual = func(data_array, x=new_x)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3402,12 +3496,8 @@ def test_interp_reindex_indexing(self, func, unit, error, dtype):
             actual = func(data_array, x=new_x)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    -    )
         @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
             "func",
    @@ -3439,7 +3529,7 @@ def test_interp_reindex_like(self, variant, func, dtype):
             actual = func(data_array, other)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3482,7 +3572,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
             actual = func(data_array, other)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3505,7 +3595,7 @@ def test_stacking_stacked(self, func, dtype):
             actual = func(stacked)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         def test_to_unstacked_dataset(self, dtype):
    @@ -3529,7 +3619,7 @@ def test_to_unstacked_dataset(self, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3564,55 +3654,71 @@ def test_stacking_reordering(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
         @pytest.mark.parametrize(
             "func",
             (
                 method("diff", dim="x"),
                 method("differentiate", coord="x"),
                 method("integrate", dim="x"),
    -            pytest.param(
    -                method("quantile", q=[0.25, 0.75]),
    -                marks=pytest.mark.xfail(
    -                    LooseVersion(pint.__version__) <= "0.12",
    -                    reason="quantile / nanquantile not implemented yet",
    -                ),
    -            ),
    +            method("quantile", q=[0.25, 0.75]),
                 method("reduce", func=np.sum, dim="x"),
                 pytest.param(lambda x: x.dot(x), id="method_dot"),
             ),
             ids=repr,
         )
    -    def test_computation(self, func, dtype):
    -        array = (
    -            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
    -        )
    +    def test_computation(self, func, variant, dtype):
    +        unit = unit_registry.m
     
    -        x = np.arange(array.shape[0]) * unit_registry.m
    -        y = np.arange(array.shape[1]) * unit_registry.s
    +        variants = {
    +            "data": (unit, 1, 1),
    +            "dims": (1, unit, 1),
    +            "coords": (1, 1, unit),
    +        }
    +        data_unit, dim_unit, coord_unit = variants.get(variant)
     
    -        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
    +        array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit
    +
    +        x = np.arange(array.shape[0]) * dim_unit
    +        y = np.arange(array.shape[1]) * dim_unit
    +
    +        u = np.linspace(0, 1, array.shape[0]) * coord_unit
    +
    +        data_array = xr.DataArray(
    +            data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y")
    +        )
     
             # we want to make sure the output unit is correct
    -        units = {
    -            **extract_units(data_array),
    -            **(
    -                {}
    -                if isinstance(func, (function, method))
    -                else extract_units(func(array.reshape(-1)))
    -            ),
    -        }
    +        units = extract_units(data_array)
    +        if not isinstance(func, (function, method)):
    +            units.update(extract_units(func(array.reshape(-1))))
     
             expected = attach_units(func(strip_units(data_array)), units)
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    -    # TODO: remove once pint==0.12 has been released
    -    @pytest.mark.xfail(
    -        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
         )
         @pytest.mark.parametrize(
             "func",
    @@ -3632,25 +3738,37 @@ def test_computation(self, func, dtype):
                         reason="numbagg functions are not supported by pint"
                     ),
                 ),
    +            method("weighted", xr.DataArray(data=np.linspace(0, 1, 10), dims="y")),
             ),
             ids=repr,
         )
    -    def test_computation_objects(self, func, dtype):
    -        array = (
    -            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
    -        )
    +    def test_computation_objects(self, func, variant, dtype):
    +        unit = unit_registry.m
     
    -        x = np.array([0, 0, 1, 2, 2]) * unit_registry.m
    -        y = np.arange(array.shape[1]) * 3 * unit_registry.s
    +        variants = {
    +            "data": (unit, 1, 1),
    +            "dims": (1, unit, 1),
    +            "coords": (1, 1, unit),
    +        }
    +        data_unit, dim_unit, coord_unit = variants.get(variant)
    +
    +        array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit
    +
    +        x = np.array([0, 0, 1, 2, 2]) * dim_unit
    +        y = np.arange(array.shape[1]) * 3 * dim_unit
     
    -        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
    +        u = np.linspace(0, 1, 5) * coord_unit
    +
    +        data_array = xr.DataArray(
    +            data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y")
    +        )
             units = extract_units(data_array)
     
             expected = attach_units(func(strip_units(data_array)).mean(), units)
             actual = func(data_array).mean()
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         def test_resample(self, dtype):
             array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
    @@ -3665,33 +3783,47 @@ def test_resample(self, dtype):
             actual = func(data_array).mean()
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    -            method("assign_coords", z=(["x"], np.arange(5) * unit_registry.s)),
    +            method("assign_coords", z=("x", np.arange(5) * unit_registry.s)),
                 method("first"),
                 method("last"),
    -            pytest.param(
    -                method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
    -                marks=pytest.mark.xfail(
    -                    LooseVersion(pint.__version__) <= "0.12",
    -                    reason="quantile / nanquantile not implemented yet",
    -                ),
    -            ),
    +            method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
             ),
             ids=repr,
         )
    -    def test_grouped_operations(self, func, dtype):
    -        array = (
    -            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
    -        )
    +    def test_grouped_operations(self, func, variant, dtype):
    +        unit = unit_registry.m
     
    -        x = np.arange(array.shape[0]) * unit_registry.m
    -        y = np.arange(array.shape[1]) * 3 * unit_registry.s
    +        variants = {
    +            "data": (unit, 1, 1),
    +            "dims": (1, unit, 1),
    +            "coords": (1, 1, unit),
    +        }
    +        data_unit, dim_unit, coord_unit = variants.get(variant)
    +        array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit
    +
    +        x = np.arange(array.shape[0]) * dim_unit
    +        y = np.arange(array.shape[1]) * 3 * dim_unit
    +
    +        u = np.linspace(0, 1, array.shape[0]) * coord_unit
     
    -        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
    +        data_array = xr.DataArray(
    +            data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y")
    +        )
             units = {**extract_units(data_array), **{"z": unit_registry.s, "q": None}}
     
             stripped_kwargs = {
    @@ -3708,10 +3840,9 @@ def test_grouped_operations(self, func, dtype):
             actual = func(data_array.groupby("y"))
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_identical(expected, actual)
     
     
    -@pytest.mark.filterwarnings("error::pint.UnitStrippedWarning")
     class TestDataset:
         @pytest.mark.parametrize(
             "unit,error",
    @@ -3796,8 +3927,7 @@ def test_init(self, shared, unit, error, dtype):
             (
                 "data",
                 pytest.param(
    -                "dims",
    -                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units"),
                 ),
                 "coords",
             ),
    @@ -4313,7 +4443,7 @@ def test_combine_first(self, variant, unit, error, dtype):
             (
                 "data",
                 pytest.param(
    -                "dims", marks=pytest.mark.xfail(reason="units in indexes not supported")
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
                 ),
                 "coords",
             ),
    @@ -4473,6 +4603,19 @@ def test_broadcast_equals(self, unit, dtype):
     
             assert expected == actual
     
    +    def test_pad(self, dtype):
    +        a = np.linspace(0, 5, 10).astype(dtype) * unit_registry.Pa
    +        b = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.degK
    +
    +        ds = xr.Dataset({"a": ("x", a), "b": ("x", b)})
    +        units = extract_units(ds)
    +
    +        expected = attach_units(strip_units(ds).pad(x=(2, 3)), units)
    +        actual = ds.pad(x=(2, 3))
    +
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
    +
         @pytest.mark.parametrize(
             "func",
             (method("unstack"), method("reset_index", "v"), method("reorder_levels")),
    @@ -5011,13 +5154,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
                 method("diff", dim="x"),
                 method("differentiate", coord="x"),
                 method("integrate", coord="x"),
    -            pytest.param(
    -                method("quantile", q=[0.25, 0.75]),
    -                marks=pytest.mark.xfail(
    -                    LooseVersion(pint.__version__) <= "0.12",
    -                    reason="nanquantile not implemented yet",
    -                ),
    -            ),
    +            method("quantile", q=[0.25, 0.75]),
                 method("reduce", func=np.sum, dim="x"),
                 method("map", np.fabs),
             ),
    @@ -5067,13 +5204,7 @@ def test_computation(self, func, variant, dtype):
             "func",
             (
                 method("groupby", "x"),
    -            pytest.param(
    -                method("groupby_bins", "x", bins=2),
    -                marks=pytest.mark.xfail(
    -                    LooseVersion(pint.__version__) <= "0.12",
    -                    reason="needs assert_allclose but that does not work with pint",
    -                ),
    -            ),
    +            method("groupby_bins", "x", bins=2),
                 method("coarsen", x=2),
                 pytest.param(
                     method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units")
    @@ -5084,6 +5215,7 @@ def test_computation(self, func, variant, dtype):
                         reason="numbagg functions are not supported by pint"
                     ),
                 ),
    +            method("weighted", xr.DataArray(data=np.linspace(0, 1, 5), dims="y")),
             ),
             ids=repr,
         )
    @@ -5122,11 +5254,7 @@ def test_computation_objects(self, func, variant, dtype):
             actual = func(ds).mean(*args)
     
             assert_units_equal(expected, actual)
    -        # TODO: remove once pint 0.12 has been released
    -        if LooseVersion(pint.__version__) <= "0.12":
    -            assert_equal(expected, actual)
    -        else:
    -            assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "variant",
    @@ -5177,13 +5305,7 @@ def test_resample(self, variant, dtype):
                 method("assign_coords", v=("x", np.arange(5) * unit_registry.s)),
                 method("first"),
                 method("last"),
    -            pytest.param(
    -                method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
    -                marks=pytest.mark.xfail(
    -                    LooseVersion(pint.__version__) <= "0.12",
    -                    reason="nanquantile not implemented",
    -                ),
    -            ),
    +            method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
             ),
             ids=repr,
         )
    
    From 03d409ec35034d78a3a625dcaf1744117587b93c Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Thu, 2 Jul 2020 13:39:00 -0700
    Subject: [PATCH 075/342] Improve the speed of from_dataframe with a MultiIndex
     (by 40x!) (#4184)
    MIME-Version: 1.0
    Content-Type: text/plain; charset=UTF-8
    Content-Transfer-Encoding: 8bit
    
    * Add MultiIndexSeries.time_to_xarray() benchmark
    
    * Improve the speed of from_dataframe with a MultiIndex
    
    Fixes GH-2459
    
    Before:
    
        pandas.MultiIndexSeries.time_to_xarray
        ======= ========= ==========
        --             subset
        ------- --------------------
        dtype     True     False
        ======= ========= ==========
          int    505±0ms   37.1±0ms
         float   485±0ms   38.3±0ms
        ======= ========= ==========
    
    After:
    
        pandas.MultiIndexSeries.time_to_xarray
        ======= ========= ==========
        --             subset
        ------- --------------------
        dtype     True     False
        ======= ========= ==========
          int    11.5±0ms   39.2±0ms
         float   12.5±0ms   26.6±0ms
        ======= ========= ==========
    
    There are still some cases where we have to fall back to the existing
    slow implementation, but hopefully they should now be relatively rare.
    
    * remove unused import
    
    * Simplify converting MultiIndex dataframes
    
    * remove comments
    
    * remove types with NA
    
    * more multiindex dataframe tests
    
    * add whats new note
    
    * Preserve order of MultiIndex levels in from_dataframe
    
    * Add todo note
    
    * Rewrite from_dataframe to avoid passing around a dataframe
    
    * Require that MultiIndexes are unique even with sparse=True
    
    * clarify comment
    ---
     asv_bench/benchmarks/pandas.py | 24 ++++++++++++
     doc/whats-new.rst              | 10 +++--
     xarray/core/dataset.py         | 67 +++++++++++++++++++++++-----------
     xarray/core/indexes.py         | 13 ++++---
     xarray/tests/test_dataset.py   | 43 ++++++++++++++++++++++
     5 files changed, 127 insertions(+), 30 deletions(-)
     create mode 100644 asv_bench/benchmarks/pandas.py
    
    diff --git a/asv_bench/benchmarks/pandas.py b/asv_bench/benchmarks/pandas.py
    new file mode 100644
    index 00000000000..42ef18ac0c2
    --- /dev/null
    +++ b/asv_bench/benchmarks/pandas.py
    @@ -0,0 +1,24 @@
    +import numpy as np
    +import pandas as pd
    +
    +from . import parameterized
    +
    +
    +class MultiIndexSeries:
    +    def setup(self, dtype, subset):
    +        data = np.random.rand(100000).astype(dtype)
    +        index = pd.MultiIndex.from_product(
    +            [
    +                list("abcdefhijk"),
    +                list("abcdefhijk"),
    +                pd.date_range(start="2000-01-01", periods=1000, freq="B"),
    +            ]
    +        )
    +        series = pd.Series(data, index)
    +        if subset:
    +            series = series[::3]
    +        self.series = series
    +
    +    @parameterized(["dtype", "subset"], ([int, float], [True, False]))
    +    def time_to_xarray(self, dtype, subset):
    +        self.series.to_xarray()
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e4223f2b4e0..5dc39da5a06 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -49,7 +49,10 @@ Enhancements
       For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially
       rather than interpolating in multidimensional space. (:issue:`2223`)
       By `Keisuke Fujii `_.
    -- :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
    +- Major performance improvement for :py:meth:`Dataset.from_dataframe` when the
    +  dataframe has a MultiIndex (:pull:`4184`).
    +  By `Stephan Hoyer `_.
    +  - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
       coordinate attributes (:pull:`4103`). By `Oriol Abril `_.
     
     New Features
    @@ -133,8 +136,9 @@ Bug fixes
       By `Deepak Cherian `_.
     - ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`)
       By `Huite Bootsma `_.
    -- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`)
    -  By `Keisuke Fujii `_.
    +- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``.
    +  (:issue:`3951`, :issue:`4186`)
    +  By `Keisuke Fujii `_ and `Stephan Hoyer `_.
     - Fix renaming of coords when one or more stacked coords is not in
       sorted order during stack+groupby+apply operations. (:issue:`3287`,
       :pull:`3906`) By `Spencer Hill `_
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index b46b1d6dce0..5bfddaa710b 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -4543,11 +4543,10 @@ def to_dataframe(self):
             return self._to_dataframe(self.dims)
     
         def _set_sparse_data_from_dataframe(
    -        self, dataframe: pd.DataFrame, dims: tuple
    +        self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple
         ) -> None:
             from sparse import COO
     
    -        idx = dataframe.index
             if isinstance(idx, pd.MultiIndex):
                 coords = np.stack([np.asarray(code) for code in idx.codes], axis=0)
                 is_sorted = idx.is_lexsorted()
    @@ -4557,11 +4556,7 @@ def _set_sparse_data_from_dataframe(
                 is_sorted = True
                 shape = (idx.size,)
     
    -        for name, series in dataframe.items():
    -            # Cast to a NumPy array first, in case the Series is a pandas
    -            # Extension array (which doesn't have a valid NumPy dtype)
    -            values = np.asarray(series)
    -
    +        for name, values in arrays:
                 # In virtually all real use cases, the sparse array will now have
                 # missing values and needs a fill_value. For consistency, don't
                 # special case the rare exceptions (e.g., dtype=int without a
    @@ -4580,18 +4575,36 @@ def _set_sparse_data_from_dataframe(
                 self[name] = (dims, data)
     
         def _set_numpy_data_from_dataframe(
    -        self, dataframe: pd.DataFrame, dims: tuple
    +        self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple
         ) -> None:
    -        idx = dataframe.index
    -        if isinstance(idx, pd.MultiIndex):
    -            # expand the DataFrame to include the product of all levels
    -            full_idx = pd.MultiIndex.from_product(idx.levels, names=idx.names)
    -            dataframe = dataframe.reindex(full_idx)
    -            shape = tuple(lev.size for lev in idx.levels)
    -        else:
    -            shape = (idx.size,)
    -        for name, series in dataframe.items():
    -            data = np.asarray(series).reshape(shape)
    +        if not isinstance(idx, pd.MultiIndex):
    +            for name, values in arrays:
    +                self[name] = (dims, values)
    +            return
    +
    +        shape = tuple(lev.size for lev in idx.levels)
    +        indexer = tuple(idx.codes)
    +
    +        # We already verified that the MultiIndex has all unique values, so
    +        # there are missing values if and only if the size of output arrays is
    +        # larger that the index.
    +        missing_values = np.prod(shape) > idx.shape[0]
    +
    +        for name, values in arrays:
    +            # NumPy indexing is much faster than using DataFrame.reindex() to
    +            # fill in missing values:
    +            # https://stackoverflow.com/a/35049899/809705
    +            if missing_values:
    +                dtype, fill_value = dtypes.maybe_promote(values.dtype)
    +                data = np.full(shape, fill_value, dtype)
    +            else:
    +                # If there are no missing values, keep the existing dtype
    +                # instead of promoting to support NA, e.g., keep integer
    +                # columns as integers.
    +                # TODO: consider removing this special case, which doesn't
    +                # exist for sparse=True.
    +                data = np.zeros(shape, values.dtype)
    +            data[indexer] = values
                 self[name] = (dims, data)
     
         @classmethod
    @@ -4631,7 +4644,19 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas
             if not dataframe.columns.is_unique:
                 raise ValueError("cannot convert DataFrame with non-unique columns")
     
    -        idx, dataframe = remove_unused_levels_categories(dataframe.index, dataframe)
    +        idx = remove_unused_levels_categories(dataframe.index)
    +
    +        if isinstance(idx, pd.MultiIndex) and not idx.is_unique:
    +            raise ValueError(
    +                "cannot convert a DataFrame with a non-unique MultiIndex into xarray"
    +            )
    +
    +        # Cast to a NumPy array first, in case the Series is a pandas Extension
    +        # array (which doesn't have a valid NumPy dtype)
    +        # TODO: allow users to control how this casting happens, e.g., by
    +        # forwarding arguments to pandas.Series.to_numpy?
    +        arrays = [(k, np.asarray(v)) for k, v in dataframe.items()]
    +
             obj = cls()
     
             if isinstance(idx, pd.MultiIndex):
    @@ -4647,9 +4672,9 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas
                 obj[index_name] = (dims, idx)
     
             if sparse:
    -            obj._set_sparse_data_from_dataframe(dataframe, dims)
    +            obj._set_sparse_data_from_dataframe(idx, arrays, dims)
             else:
    -            obj._set_numpy_data_from_dataframe(dataframe, dims)
    +            obj._set_numpy_data_from_dataframe(idx, arrays, dims)
             return obj
     
         def to_dask_dataframe(self, dim_order=None, set_index=False):
    diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
    index a4a5fa2c466..6b7220fdfd4 100644
    --- a/xarray/core/indexes.py
    +++ b/xarray/core/indexes.py
    @@ -9,7 +9,7 @@
     from .variable import Variable
     
     
    -def remove_unused_levels_categories(index, dataframe=None):
    +def remove_unused_levels_categories(index: pd.Index) -> pd.Index:
         """
         Remove unused levels from MultiIndex and unused categories from CategoricalIndex
         """
    @@ -25,14 +25,15 @@ def remove_unused_levels_categories(index, dataframe=None):
                     else:
                         level = level[index.codes[i]]
                     levels.append(level)
    +            # TODO: calling from_array() reorders MultiIndex levels. It would
    +            # be best to avoid this, if possible, e.g., by using
    +            # MultiIndex.remove_unused_levels() (which does not reorder) on the
    +            # part of the MultiIndex that is not categorical, or by fixing this
    +            # upstream in pandas.
                 index = pd.MultiIndex.from_arrays(levels, names=index.names)
         elif isinstance(index, pd.CategoricalIndex):
             index = index.remove_unused_categories()
    -
    -    if dataframe is None:
    -        return index
    -    dataframe = dataframe.set_index(index)
    -    return dataframe.index, dataframe
    +    return index
     
     
     class Indexes(collections.abc.Mapping):
    diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
    index 0c4082a553e..62e2dd5c4f2 100644
    --- a/xarray/tests/test_dataset.py
    +++ b/xarray/tests/test_dataset.py
    @@ -4013,6 +4013,49 @@ def test_to_and_from_empty_dataframe(self):
             assert len(actual) == 0
             assert expected.equals(actual)
     
    +    def test_from_dataframe_multiindex(self):
    +        index = pd.MultiIndex.from_product([["a", "b"], [1, 2, 3]], names=["x", "y"])
    +        df = pd.DataFrame({"z": np.arange(6)}, index=index)
    +
    +        expected = Dataset(
    +            {"z": (("x", "y"), [[0, 1, 2], [3, 4, 5]])},
    +            coords={"x": ["a", "b"], "y": [1, 2, 3]},
    +        )
    +        actual = Dataset.from_dataframe(df)
    +        assert_identical(actual, expected)
    +
    +        df2 = df.iloc[[3, 2, 1, 0, 4, 5], :]
    +        actual = Dataset.from_dataframe(df2)
    +        assert_identical(actual, expected)
    +
    +        df3 = df.iloc[:4, :]
    +        expected3 = Dataset(
    +            {"z": (("x", "y"), [[0, 1, 2], [3, np.nan, np.nan]])},
    +            coords={"x": ["a", "b"], "y": [1, 2, 3]},
    +        )
    +        actual = Dataset.from_dataframe(df3)
    +        assert_identical(actual, expected3)
    +
    +        df_nonunique = df.iloc[[0, 0], :]
    +        with raises_regex(ValueError, "non-unique MultiIndex"):
    +            Dataset.from_dataframe(df_nonunique)
    +
    +    def test_from_dataframe_unsorted_levels(self):
    +        # regression test for GH-4186
    +        index = pd.MultiIndex(
    +            levels=[["b", "a"], ["foo"]], codes=[[0, 1], [0, 0]], names=["lev1", "lev2"]
    +        )
    +        df = pd.DataFrame({"c1": [0, 2], "c2": [1, 3]}, index=index)
    +        expected = Dataset(
    +            {
    +                "c1": (("lev1", "lev2"), [[0], [2]]),
    +                "c2": (("lev1", "lev2"), [[1], [3]]),
    +            },
    +            coords={"lev1": ["b", "a"], "lev2": ["foo"]},
    +        )
    +        actual = Dataset.from_dataframe(df)
    +        assert_identical(actual, expected)
    +
         def test_from_dataframe_non_unique_columns(self):
             # regression test for GH449
             df = pd.DataFrame(np.zeros((2, 2)))
    
    From 329cefb99220042a586c2b5fdbea679fd8a89cd5 Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Thu, 2 Jul 2020 20:51:10 +0000
    Subject: [PATCH 076/342] Fix to_unstacked_dataset for single dimension
     variables. (#4094)
    
    ---
     doc/whats-new.rst            | 2 ++
     xarray/core/dataarray.py     | 2 +-
     xarray/tests/test_dataset.py | 8 ++++++++
     3 files changed, 11 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 5dc39da5a06..c7a2b85561b 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -171,6 +171,8 @@ Bug fixes
       By `Mathias Hauser `_.
     - Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`)
       By `Benoit Bovy `_.
    +- Fix :py:meth:`DataArray.to_unstacked_dataset` for single-dimension variables. (:issue:`4049`)
    +  By `Deepak Cherian `_
     - Fix :py:func:`open_rasterio` for ``WarpedVRT`` with specified ``src_crs``. (:pull:`4104`)
       By `Dave Cole `_.
     
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index 0ce76a5e23a..dbc4877fa1d 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -1961,7 +1961,7 @@ def to_unstacked_dataset(self, dim, level=0):
             # pull variables out of datarray
             data_dict = {}
             for k in variables:
    -            data_dict[k] = self.sel({variable_dim: k}).squeeze(drop=True)
    +            data_dict[k] = self.sel({variable_dim: k}, drop=True).squeeze(drop=True)
     
             # unstacked dataset
             return Dataset(data_dict)
    diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
    index 62e2dd5c4f2..9037013cc79 100644
    --- a/xarray/tests/test_dataset.py
    +++ b/xarray/tests/test_dataset.py
    @@ -3031,6 +3031,14 @@ def test_to_stacked_array_dtype_dims(self):
             assert y.dims == ("x", "features")
     
         def test_to_stacked_array_to_unstacked_dataset(self):
    +
    +        # single dimension: regression test for GH4049
    +        arr = xr.DataArray(np.arange(3), coords=[("x", [0, 1, 2])])
    +        data = xr.Dataset({"a": arr, "b": arr})
    +        stacked = data.to_stacked_array("y", sample_dims=["x"])
    +        unstacked = stacked.to_unstacked_dataset("y")
    +        assert_identical(unstacked, data)
    +
             # make a two dimensional dataset
             a, b = create_test_stacked_array()
             D = xr.Dataset({"a": a, "b": b})
    
    From 834d4c461e523d2a1873617b027d3e20cf255bd2 Mon Sep 17 00:00:00 2001
    From: raphael dussin 
    Date: Thu, 2 Jul 2020 16:51:55 -0400
    Subject: [PATCH 077/342] Allow passing axis kwargs to plot (#4020)
    
    * fix facecolor plot
    
    * temp version
    
    * finish fix facecolor + solves #3169
    
    * black formatting
    
    * add testing
    
    * allow cartopy projection to be a kwarg
    
    * fix PEP8 comment
    
    * black formatting
    
    * fix testing, plt not in parameterize
    
    * fix testing, allows for no matplotlib
    
    * black formating
    
    * fix tests without matplotlib
    
    * fix some mistakes
    
    * isort, mypy
    
    * fix mypy
    
    * remove empty line
    
    * correction from review
    
    * correction from 2nd review
    
    * updated tests
    
    * updated tests
    
    * black formatting
    
    * follow up correction from review
    
    * fix tests
    
    * fix tests again
    
    * fix bug in tests
    
    * fix pb in tests
    
    * remove useless line
    
    * clean up tests
    
    * fix
    
    * Add whats-new
    
    Co-authored-by: dcherian 
    ---
     doc/plotting.rst          |  9 +++++----
     doc/whats-new.rst         |  3 +++
     xarray/plot/plot.py       | 15 +++++++++------
     xarray/plot/utils.py      | 14 ++++++++++----
     xarray/tests/__init__.py  |  1 +
     xarray/tests/test_plot.py | 40 +++++++++++++++++++++++++++++++++++++++
     6 files changed, 68 insertions(+), 14 deletions(-)
    
    diff --git a/doc/plotting.rst b/doc/plotting.rst
    index 72248e31b1e..02ddba1e00c 100644
    --- a/doc/plotting.rst
    +++ b/doc/plotting.rst
    @@ -743,12 +743,13 @@ This script will plot the air temperature on a map.
     
         air = xr.tutorial.open_dataset("air_temperature").air
     
    -    ax = plt.axes(projection=ccrs.Orthographic(-80, 35))
    -    air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree())
    -    ax.set_global()
    +    p = air.isel(time=0).plot(
    +        subplot_kws=dict(projection=ccrs.Orthographic(-80, 35), facecolor="gray"),
    +        transform=ccrs.PlateCarree())
    +    p.axes.set_global()
     
         @savefig plotting_maps_cartopy.png width=100%
    -    ax.coastlines()
    +    p.axes.coastlines()
     
     When faceting on maps, the projection can be transferred to the ``plot``
     function using the ``subplot_kws`` keyword. The axes for the subplots created
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index c7a2b85561b..43b248670f5 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -54,6 +54,9 @@ Enhancements
       By `Stephan Hoyer `_.
       - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
       coordinate attributes (:pull:`4103`). By `Oriol Abril `_.
    +- Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``.
    +  This works for both single axes plots and FacetGrid plots.
    +  By `Raphael Dussin 
    Date: Fri, 3 Jul 2020 02:51:32 +0000
    Subject: [PATCH 078/342] Bump minimum versions for 0.16 release (#4175)
    
    * Bump minimum versions for 0.16 release
    
    * Undo scipy bump
    
    * fix bumps
    ---
     ci/requirements/py36-min-all-deps.yml | 7 +++----
     ci/requirements/py36-min-nep18.yml    | 5 ++---
     doc/whats-new.rst                     | 3 +++
     3 files changed, 8 insertions(+), 7 deletions(-)
    
    diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
    index a72cd000680..c11c52bd19f 100644
    --- a/ci/requirements/py36-min-all-deps.yml
    +++ b/ci/requirements/py36-min-all-deps.yml
    @@ -15,8 +15,8 @@ dependencies:
       - cfgrib=0.9
       - cftime=1.0
       - coveralls
    -  - dask=2.5
    -  - distributed=2.5
    +  - dask=2.9
    +  - distributed=2.9
       - flake8
       - h5netcdf=0.7
       - h5py=2.9  # Policy allows for 2.10, but it's a conflict-fest
    @@ -26,11 +26,10 @@ dependencies:
       - isort
       - lxml=4.4  # Optional dep of pydap
       - matplotlib=3.1
    -  - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
       - mypy=0.761  # Must match .pre-commit-config.yaml
       - nc-time-axis=1.2
       - netcdf4=1.4
    -  - numba=0.44
    +  - numba=0.46
       - numpy=1.15
       - pandas=0.25
       # - pint  # See py36-min-nep18.yml
    diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
    index dd543ce4ddf..a9f12abfeae 100644
    --- a/ci/requirements/py36-min-nep18.yml
    +++ b/ci/requirements/py36-min-nep18.yml
    @@ -6,9 +6,8 @@ dependencies:
       # require drastically newer packages than everything else
       - python=3.6
       - coveralls
    -  - dask=2.5
    -  - distributed=2.5
    -  - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
    +  - dask=2.9
    +  - distributed=2.9
       - numpy=1.17
       - pandas=0.25
       - pint=0.13
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 43b248670f5..378dfb30f25 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -22,6 +22,9 @@ v0.16.0 (unreleased)
     Breaking changes
     ~~~~~~~~~~~~~~~~
     
    +- Minimum supported versions for the following packages have changed: ``dask >=2.9``,
    +  ``distributed>=2.9``.
    +  By `Deepak Cherian `_
     - ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False``
       to revert to previous behavior.
     - :meth:`DataArray.transpose` will now transpose coordinates by default.
    
    From 03c8562bda56cbd90e571a5beb41f44fba064813 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sat, 4 Jul 2020 19:24:14 +0200
    Subject: [PATCH 079/342] get the colorbar label via public methods (#4201)
    
    ---
     xarray/tests/test_plot.py | 9 ++++++++-
     1 file changed, 8 insertions(+), 1 deletion(-)
    
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index 610730e9eb2..788c26f3b39 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -88,6 +88,13 @@ def easy_array(shape, start=0, stop=1):
         return a.reshape(shape)
     
     
    +def get_colorbar_label(colorbar):
    +    if colorbar.orientation == "vertical":
    +        return colorbar.ax.get_ylabel()
    +    else:
    +        return colorbar.ax.get_xlabel()
    +
    +
     @requires_matplotlib
     class PlotTestCase:
         @pytest.fixture(autouse=True)
    @@ -1414,7 +1421,7 @@ def test_facetgrid_cbar_kwargs(self):
     
             # catch contour case
             if hasattr(g, "cbar"):
    -            assert g.cbar._label == "test_label"
    +            assert get_colorbar_label(g.cbar) == "test_label"
     
         def test_facetgrid_no_cbar_ax(self):
             a = easy_array((10, 15, 2, 3))
    
    From 64c13918492a4b9ef5431ca7461e039a4bd69c95 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Tue, 7 Jul 2020 16:50:29 +0200
    Subject: [PATCH 080/342] pin isort (#4206)
    
    * pin isort for now
    
    * also pin isort for all other CI
    
    * pin isort to the exact version
    ---
     ci/requirements/py36-min-all-deps.yml | 2 +-
     ci/requirements/py36.yml              | 2 +-
     ci/requirements/py37-windows.yml      | 2 +-
     ci/requirements/py37.yml              | 2 +-
     ci/requirements/py38-all-but-dask.yml | 2 +-
     ci/requirements/py38.yml              | 2 +-
     6 files changed, 6 insertions(+), 6 deletions(-)
    
    diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
    index c11c52bd19f..b14582ca9c2 100644
    --- a/ci/requirements/py36-min-all-deps.yml
    +++ b/ci/requirements/py36-min-all-deps.yml
    @@ -23,7 +23,7 @@ dependencies:
       - hdf5=1.10
       - hypothesis
       - iris=2.2
    -  - isort
    +  - isort=4.3.21
       - lxml=4.4  # Optional dep of pydap
       - matplotlib=3.1
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
    index a500173f277..9ff2c6c49ca 100644
    --- a/ci/requirements/py36.yml
    +++ b/ci/requirements/py36.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort
    +  - isort=4.3.21
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
    index e9e5c7a900a..19285a35eca 100644
    --- a/ci/requirements/py37-windows.yml
    +++ b/ci/requirements/py37-windows.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort
    +  - isort=4.3.21
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
    index dba3926596e..3fcb4efd009 100644
    --- a/ci/requirements/py37.yml
    +++ b/ci/requirements/py37.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort
    +  - isort=4.3.21
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
    index a375d9e1e5a..4e6f0dd5387 100644
    --- a/ci/requirements/py38-all-but-dask.yml
    +++ b/ci/requirements/py38-all-but-dask.yml
    @@ -16,7 +16,7 @@ dependencies:
       - h5py
       - hdf5
       - hypothesis
    -  - isort
    +  - isort=4.3.21
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
    index 7dff3a1bd97..4598fcd2790 100644
    --- a/ci/requirements/py38.yml
    +++ b/ci/requirements/py38.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort
    +  - isort=4.3.21
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.780  # Must match .pre-commit-config.yaml
    
    From f3ca63a4ac5c091a92085b477a0d34c08df88aa6 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Tue, 7 Jul 2020 16:52:26 +0200
    Subject: [PATCH 081/342] fix sphinx warnings (#4199)
    
    * fix a link
    
    * remove the earlier attempts to document .str and .dt
    
    * fail warnings on RTD
    
    * disable fail_on_warning again
    ---
     doc/api.rst       | 2 --
     doc/whats-new.rst | 2 +-
     readthedocs.yml   | 3 +++
     3 files changed, 4 insertions(+), 3 deletions(-)
    
    diff --git a/doc/api.rst b/doc/api.rst
    index 603e3e8f6cf..72a6dd4d97a 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -360,7 +360,6 @@ Computation
        DataArray.rolling_exp
        DataArray.weighted
        DataArray.coarsen
    -   DataArray.dt
        DataArray.resample
        DataArray.get_axis_num
        DataArray.diff
    @@ -369,7 +368,6 @@ Computation
        DataArray.differentiate
        DataArray.integrate
        DataArray.polyfit
    -   DataArray.str
        DataArray.map_blocks
     
     
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 378dfb30f25..b33f817e9e1 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -59,7 +59,7 @@ Enhancements
       coordinate attributes (:pull:`4103`). By `Oriol Abril `_.
     - Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``.
       This works for both single axes plots and FacetGrid plots.
    -  By `Raphael Dussin `_.
     
     New Features
     ~~~~~~~~~~~~
    diff --git a/readthedocs.yml b/readthedocs.yml
    index 88aee82a44b..88abb57ae43 100644
    --- a/readthedocs.yml
    +++ b/readthedocs.yml
    @@ -6,4 +6,7 @@ build:
     conda:
         environment: ci/requirements/doc.yml
     
    +sphinx:
    +  fail_on_warning: false
    +
     formats: []
    
    From c90d6dc1f68bcc857c5e0a19c8da75e68b76673f Mon Sep 17 00:00:00 2001
    From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
    Date: Sat, 11 Jul 2020 16:33:18 -0400
    Subject: [PATCH 082/342] Minor reorg of whatsnew for 0.16.0 (#4216)
    
    ---
     doc/whats-new.rst | 75 ++++++++++++++++++++++++++++-------------------
     1 file changed, 45 insertions(+), 30 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index b33f817e9e1..eda89f8c0c1 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -16,9 +16,21 @@ What's New
     
     .. _whats-new.0.16.0:
     
    -v0.16.0 (unreleased)
    +v0.16.0 (2020-07-11)
     ---------------------
     
    +Thank you to all contributors who built this release:
    +
    +Akio Taniguchi, Andrew Williams, Aurélien Ponte, Benoit Bovy, Dave Cole, David
    +Brochart, Deepak Cherian, Elliott Sales de Andrade, Etienne Combrisson, Hossein
    +Madadi, Huite, Joe Hamman, Kai Mühlbauer, Keisuke Fujii, Maik Riechert, Marek
    +Jacob, Mathias Hauser, Matthieu Ancellin, Maximilian Roos, Noah D Brenowitz,
    +Oriol Abril, Pascal Bourgault, Phillip Butcher, Prajjwal Nijhara, Ray Bell, Ryan
    +Abernathey, Ryan May, Spencer Clark, Spencer Hill, Srijan Saurav, Stephan Hoyer,
    +Taher Chegini, Todd, Tom Nicholas, Yohai Bar Sinai, Yunus Sevinchan,
    +arabidopsis, aurghs, clausmichele, dmey, johnomotani, keewis, raphael dussin,
    +risebell
    +
     Breaking changes
     ~~~~~~~~~~~~~~~~
     
    @@ -46,21 +58,6 @@ Breaking changes
       default (:issue:`4176`)
       By `Stephan Hoyer `_.
     
    -Enhancements
    -~~~~~~~~~~~~
    -- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp`
    -  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially
    -  rather than interpolating in multidimensional space. (:issue:`2223`)
    -  By `Keisuke Fujii `_.
    -- Major performance improvement for :py:meth:`Dataset.from_dataframe` when the
    -  dataframe has a MultiIndex (:pull:`4184`).
    -  By `Stephan Hoyer `_.
    -  - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
    -  coordinate attributes (:pull:`4103`). By `Oriol Abril `_.
    -- Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``.
    -  This works for both single axes plots and FacetGrid plots.
    -  By `Raphael Dussin `_.
    -
     New Features
     ~~~~~~~~~~~~
     - :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support
    @@ -70,15 +67,19 @@ New Features
       (:pull:`3936`)
       By `John Omotani `_, thanks to `Keisuke Fujii
       `_ for work in :pull:`1469`.
    +- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
    +  By `Andrew Williams `_ and `Robin Beer `_.
    +- Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
    +  :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:issue:`60`, :pull:`3871`)
    +  By `Todd Jennings `_
    +- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting
    +  polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`)
    +  By `Pascal Bourgault `_.
     - Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`).
       By `Pascal Bourgault `_.
     - ``chunks='auto'`` is now supported in the ``chunks`` argument of
       :py:meth:`Dataset.chunk`. (:issue:`4055`)
       By `Andrew Williams `_
    -- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
    -  By `Andrew Williams `_ and `Robin Beer `_.
    -- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`)
    -  By `Pascal Bourgault `_.
     - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
       :py:func:`combine_by_coords` and :py:func:`combine_nested` using
       combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
    @@ -88,15 +89,6 @@ New Features
       the exception when a dimension passed to ``isel`` is not present with a
       warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`)
       By `John Omotani `_
    -- Limited the length of array items with long string reprs to a
    -  reasonable width (:pull:`3900`)
    -  By `Maximilian Roos `_
    -- Limited the number of lines of large arrays when numpy reprs would have greater than 40.
    -  (:pull:`3905`)
    -  By `Maximilian Roos `_
    -- Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
    -  :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:issue:`60`, :pull:`3871`)
    -  By `Todd Jennings `_
     - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
       :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:pull:`3922`, :pull:`4135`)
       By `Kai Mühlbauer `_ and `Pascal Bourgault `_.
    @@ -128,7 +120,30 @@ New Features
       (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
       :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas
       independently of time decoding (:issue:`1621`)
    -  `Aureliana Barghini `
    +  `Aureliana Barghini `_
    +
    +Enhancements
    +~~~~~~~~~~~~
    +- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp`
    +  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially
    +  rather than interpolating in multidimensional space. (:issue:`2223`)
    +  By `Keisuke Fujii `_.
    +- Major performance improvement for :py:meth:`Dataset.from_dataframe` when the
    +  dataframe has a MultiIndex (:pull:`4184`).
    +  By `Stephan Hoyer `_.
    +  - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
    +  coordinate attributes (:pull:`4103`). By `Oriol Abril `_.
    +- Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``.
    +  This works for both single axes plots and FacetGrid plots.
    +  By `Raphael Dussin `_.
    +- Array items with long string reprs are now limited to a
    +  reasonable width (:pull:`3900`)
    +  By `Maximilian Roos `_
    +- Large arrays whose numpy reprs would have greater than 40 lines are now
    +  limited to a reasonable length.
    +  (:pull:`3905`)
    +  By `Maximilian Roos `_
    +
     
     Bug fixes
     ~~~~~~~~~
    
    From 567692634a56a13076a3ad39a46927a613d9e13f Mon Sep 17 00:00:00 2001
    From: Maximilian Roos 
    Date: Sat, 11 Jul 2020 13:35:44 -0700
    Subject: [PATCH 083/342] Release v0.16.0
    
    
    From c8d452804dd0058ce5d3429215b4152ac3322a37 Mon Sep 17 00:00:00 2001
    From: Maximilian Roos 
    Date: Sat, 11 Jul 2020 13:41:19 -0700
    Subject: [PATCH 084/342] New whatsnew section
    
    ---
     doc/whats-new.rst | 25 +++++++++++++++++++++++++
     1 file changed, 25 insertions(+)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index eda89f8c0c1..c4f6108629c 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -14,6 +14,31 @@ What's New
     
         np.random.seed(123456)
     
    +.. _whats-new.0.16.1:
    +
    +v0.16.1 (unreleased)
    +---------------------
    +
    +Breaking changes
    +~~~~~~~~~~~~~~~~
    +
    +
    +New Features
    +~~~~~~~~~~~~
    +
    +
    +Bug fixes
    +~~~~~~~~~
    +
    +
    +Documentation
    +~~~~~~~~~~~~~
    +
    +
    +Internal Changes
    +~~~~~~~~~~~~~~~~
    +
    +
     .. _whats-new.0.16.0:
     
     v0.16.0 (2020-07-11)
    
    From 7bf9df9d75c40bcbf2dd28c47204529a76561a3f Mon Sep 17 00:00:00 2001
    From: Maximilian Roos 
    Date: Sat, 11 Jul 2020 14:17:02 -0700
    Subject: [PATCH 085/342] Add 0.16.0 release summary
    
    ---
     doc/whats-new.rst | 6 +++++-
     1 file changed, 5 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index c4f6108629c..d086d4f411d 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -44,7 +44,11 @@ Internal Changes
     v0.16.0 (2020-07-11)
     ---------------------
     
    -Thank you to all contributors who built this release:
    +This release adds `xarray.cov` & `xarray.corr` for covariance & correlation
    +respectively; the `idxmax` & `idxmin` methods, the `polyfit` method &
    +`xarray.polyval` for fitting polynomials, as well as a number of documentation
    +improvements, other features, and bug fixes. Many thanks to all 44 contributors
    +who contributed to this release:
     
     Akio Taniguchi, Andrew Williams, Aurélien Ponte, Benoit Bovy, Dave Cole, David
     Brochart, Deepak Cherian, Elliott Sales de Andrade, Etienne Combrisson, Hossein
    
    From 52043bc57f20438e8923790bca90b646c82442ad Mon Sep 17 00:00:00 2001
    From: Jacob Tomlinson 
    Date: Mon, 13 Jul 2020 16:07:43 +0100
    Subject: [PATCH 086/342] Add initial cupy tests (#4214)
    
    * Add initial cupy tests
    
    * Linting
    
    * Docstrings
    ---
     xarray/tests/test_cupy.py | 50 +++++++++++++++++++++++++++++++++++++++
     1 file changed, 50 insertions(+)
     create mode 100644 xarray/tests/test_cupy.py
    
    diff --git a/xarray/tests/test_cupy.py b/xarray/tests/test_cupy.py
    new file mode 100644
    index 00000000000..624e78d9271
    --- /dev/null
    +++ b/xarray/tests/test_cupy.py
    @@ -0,0 +1,50 @@
    +import numpy as np
    +import pandas as pd
    +import pytest
    +
    +import xarray as xr
    +
    +cp = pytest.importorskip("cupy")
    +
    +
    +@pytest.fixture
    +def toy_weather_data():
    +    """Construct the example DataSet from the Toy weather data example.
    +
    +    http://xarray.pydata.org/en/stable/examples/weather-data.html
    +
    +    Here we construct the DataSet exactly as shown in the example and then
    +    convert the numpy arrays to cupy.
    +
    +    """
    +    np.random.seed(123)
    +    times = pd.date_range("2000-01-01", "2001-12-31", name="time")
    +    annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28))
    +
    +    base = 10 + 15 * annual_cycle.reshape(-1, 1)
    +    tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)
    +    tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3)
    +
    +    ds = xr.Dataset(
    +        {
    +            "tmin": (("time", "location"), tmin_values),
    +            "tmax": (("time", "location"), tmax_values),
    +        },
    +        {"time": times, "location": ["IA", "IN", "IL"]},
    +    )
    +
    +    ds.tmax.data = cp.asarray(ds.tmax.data)
    +    ds.tmin.data = cp.asarray(ds.tmin.data)
    +
    +    return ds
    +
    +
    +def test_cupy_import():
    +    """Check the import worked."""
    +    assert cp
    +
    +
    +def test_check_data_stays_on_gpu(toy_weather_data):
    +    """Perform some operations and check the data stays on the GPU."""
    +    freeze = (toy_weather_data["tmin"] <= 0).groupby("time.month").mean("time")
    +    assert isinstance(freeze.data, cp.core.core.ndarray)
    
    From 1be777fe725a85b8cc0f65a2bc41f4bc2ba18043 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Thu, 16 Jul 2020 21:13:57 +0200
    Subject: [PATCH 087/342] update isort CI and pre-commit hook (#4204)
    
    * don't use the removed isort flag
    
    * update the isort hook
    
    * remove the recursive flag from the pull request template
    
    * remove the recursive flag from the contributing guide
    
    * unpin isort
    
    * update the isort pre-commit hook
    
    * apply isort which now also formats imports in functions
    
    * update the flake8 pre-commit hook to get mypy overloads to pass
    ---
     .github/PULL_REQUEST_TEMPLATE.md      |  2 +-
     .pre-commit-config.yaml               |  5 ++---
     azure-pipelines.yml                   |  2 +-
     ci/requirements/py36-min-all-deps.yml |  2 +-
     ci/requirements/py36.yml              |  2 +-
     ci/requirements/py37-windows.yml      |  2 +-
     ci/requirements/py37.yml              |  2 +-
     ci/requirements/py38-all-but-dask.yml |  2 +-
     ci/requirements/py38.yml              |  2 +-
     conftest.py                           |  1 +
     doc/contributing.rst                  |  2 +-
     xarray/backends/scipy_.py             |  3 ++-
     xarray/conventions.py                 |  2 +-
     xarray/convert.py                     |  1 +
     xarray/core/common.py                 |  4 ++--
     xarray/core/computation.py            |  2 +-
     xarray/core/concat.py                 |  2 +-
     xarray/core/dataset.py                |  6 +++---
     xarray/core/groupby.py                |  2 +-
     xarray/core/indexing.py               |  2 +-
     xarray/core/nanops.py                 |  3 ++-
     xarray/core/parallel.py               |  1 +
     xarray/tests/test_backends.py         |  7 ++++---
     xarray/tests/test_coding_times.py     |  6 ++++--
     xarray/tests/test_dataarray.py        | 10 +++++-----
     25 files changed, 41 insertions(+), 34 deletions(-)
    
    diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
    index c9c0b720c35..15c971907f6 100644
    --- a/.github/PULL_REQUEST_TEMPLATE.md
    +++ b/.github/PULL_REQUEST_TEMPLATE.md
    @@ -2,6 +2,6 @@
     
      - [ ] Closes #xxxx
      - [ ] Tests added
    - - [ ] Passes `isort -rc . && black . && mypy . && flake8`
    + - [ ] Passes `isort . && black . && mypy . && flake8`
      - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst`
      - [ ] New functions/methods are listed in `api.rst`
    diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
    index 447f0007fc2..9fd92a50c16 100644
    --- a/.pre-commit-config.yaml
    +++ b/.pre-commit-config.yaml
    @@ -2,10 +2,9 @@
     repos:
       # isort should run before black as black sometimes tweaks the isort output
       - repo: https://github.com/timothycrosley/isort
    -    rev: 4.3.21-2
    +    rev: 5.1.0
         hooks:
           - id: isort
    -        files: .+\.py$
       # https://github.com/python/black#version-control-integration
       - repo: https://github.com/python/black
         rev: stable
    @@ -16,7 +15,7 @@ repos:
         hooks:
           - id: blackdoc
       - repo: https://gitlab.com/pycqa/flake8
    -    rev: 3.7.9
    +    rev: 3.8.3
         hooks:
           - id: flake8
       - repo: https://github.com/pre-commit/mirrors-mypy
    diff --git a/azure-pipelines.yml b/azure-pipelines.yml
    index e04c8f74f68..8061c9895ca 100644
    --- a/azure-pipelines.yml
    +++ b/azure-pipelines.yml
    @@ -95,7 +95,7 @@ jobs:
       - template: ci/azure/install.yml
       - bash: |
           source activate xarray-tests
    -      isort -rc --check .
    +      isort --check .
         displayName: isort formatting checks
     
     - job: MinimumVersionsPolicy
    diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
    index b14582ca9c2..c11c52bd19f 100644
    --- a/ci/requirements/py36-min-all-deps.yml
    +++ b/ci/requirements/py36-min-all-deps.yml
    @@ -23,7 +23,7 @@ dependencies:
       - hdf5=1.10
       - hypothesis
       - iris=2.2
    -  - isort=4.3.21
    +  - isort
       - lxml=4.4  # Optional dep of pydap
       - matplotlib=3.1
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
    index 9ff2c6c49ca..a500173f277 100644
    --- a/ci/requirements/py36.yml
    +++ b/ci/requirements/py36.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort=4.3.21
    +  - isort
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
    index 19285a35eca..e9e5c7a900a 100644
    --- a/ci/requirements/py37-windows.yml
    +++ b/ci/requirements/py37-windows.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort=4.3.21
    +  - isort
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
    index 3fcb4efd009..dba3926596e 100644
    --- a/ci/requirements/py37.yml
    +++ b/ci/requirements/py37.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort=4.3.21
    +  - isort
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
    index 4e6f0dd5387..a375d9e1e5a 100644
    --- a/ci/requirements/py38-all-but-dask.yml
    +++ b/ci/requirements/py38-all-but-dask.yml
    @@ -16,7 +16,7 @@ dependencies:
       - h5py
       - hdf5
       - hypothesis
    -  - isort=4.3.21
    +  - isort
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.761  # Must match .pre-commit-config.yaml
    diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
    index 4598fcd2790..7dff3a1bd97 100644
    --- a/ci/requirements/py38.yml
    +++ b/ci/requirements/py38.yml
    @@ -19,7 +19,7 @@ dependencies:
       - hdf5
       - hypothesis
       - iris
    -  - isort=4.3.21
    +  - isort
       - lxml    # Optional dep of pydap
       - matplotlib
       - mypy=0.780  # Must match .pre-commit-config.yaml
    diff --git a/conftest.py b/conftest.py
    index 712af1d3759..ddce5e0d593 100644
    --- a/conftest.py
    +++ b/conftest.py
    @@ -27,6 +27,7 @@ def pytest_runtest_setup(item):
     def add_standard_imports(doctest_namespace):
         import numpy as np
         import pandas as pd
    +
         import xarray as xr
     
         doctest_namespace["np"] = np
    diff --git a/doc/contributing.rst b/doc/contributing.rst
    index 9e6a3c250e9..975f4e67ba2 100644
    --- a/doc/contributing.rst
    +++ b/doc/contributing.rst
    @@ -368,7 +368,7 @@ xarray uses several tools to ensure a consistent code format throughout the proj
     
     and then run from the root of the Xarray repository::
     
    -   isort -rc .
    +   isort .
        black -t py36 .
        blackdoc -t py36 .
        flake8
    diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py
    index 9863285d6de..b7d91a840fe 100644
    --- a/xarray/backends/scipy_.py
    +++ b/xarray/backends/scipy_.py
    @@ -57,9 +57,10 @@ def __setitem__(self, key, value):
     
     
     def _open_scipy_netcdf(filename, mode, mmap, version):
    -    import scipy.io
         import gzip
     
    +    import scipy.io
    +
         # if the string ends with .gz, then gunzip and open as netcdf file
         if isinstance(filename, str) and filename.endswith(".gz"):
             try:
    diff --git a/xarray/conventions.py b/xarray/conventions.py
    index fc0572944f3..700dcbc0fc4 100644
    --- a/xarray/conventions.py
    +++ b/xarray/conventions.py
    @@ -567,8 +567,8 @@ def decode_cf(
         -------
         decoded : Dataset
         """
    -    from .core.dataset import Dataset
         from .backends.common import AbstractDataStore
    +    from .core.dataset import Dataset
     
         if isinstance(obj, Dataset):
             vars = obj._variables
    diff --git a/xarray/convert.py b/xarray/convert.py
    index 0c86b090f34..395581bace7 100644
    --- a/xarray/convert.py
    +++ b/xarray/convert.py
    @@ -254,6 +254,7 @@ def from_iris(cube):
         """ Convert a Iris cube into an DataArray
         """
         import iris.exceptions
    +
         from xarray.core.pycompat import dask_array_type
     
         name = _name(cube)
    diff --git a/xarray/core/common.py b/xarray/core/common.py
    index 67dc0fda461..c95df77313e 100644
    --- a/xarray/core/common.py
    +++ b/xarray/core/common.py
    @@ -1088,9 +1088,9 @@ def resample(
             """
             # TODO support non-string indexer after removing the old API.
     
    +        from ..coding.cftimeindex import CFTimeIndex
             from .dataarray import DataArray
             from .resample import RESAMPLE_DIM
    -        from ..coding.cftimeindex import CFTimeIndex
     
             if keep_attrs is None:
                 keep_attrs = _get_keep_attrs(default=False)
    @@ -1283,8 +1283,8 @@ def isin(self, test_elements):
             numpy.isin
             """
             from .computation import apply_ufunc
    -        from .dataset import Dataset
             from .dataarray import DataArray
    +        from .dataset import Dataset
             from .variable import Variable
     
             if isinstance(test_elements, Dataset):
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index d8a0c53e817..94d4c6b1540 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -976,8 +976,8 @@ def earth_mover_distance(first_samples,
         .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
         .. [3] http://xarray.pydata.org/en/stable/computation.html#wrapping-custom-computation
         """
    -    from .groupby import GroupBy
         from .dataarray import DataArray
    +    from .groupby import GroupBy
         from .variable import Variable
     
         if input_core_dims is None:
    diff --git a/xarray/core/concat.py b/xarray/core/concat.py
    index 7741cbb826b..b42c91c232d 100644
    --- a/xarray/core/concat.py
    +++ b/xarray/core/concat.py
    @@ -116,8 +116,8 @@ def concat(
         # TODO: add ignore_index arguments copied from pandas.concat
         # TODO: support concatenating scalar coordinates even if the concatenated
         # dimension already exists
    -    from .dataset import Dataset
         from .dataarray import DataArray
    +    from .dataset import Dataset
     
         try:
             first_obj, objs = utils.peek_at(objs)
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 5bfddaa710b..1b0e01914f2 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -4144,7 +4144,7 @@ def interpolate_na(
             numpy.interp
             scipy.interpolate
             """
    -        from .missing import interp_na, _apply_over_vars_with_dim
    +        from .missing import _apply_over_vars_with_dim, interp_na
     
             new = _apply_over_vars_with_dim(
                 interp_na,
    @@ -4178,7 +4178,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> "Dataset":
             -------
             Dataset
             """
    -        from .missing import ffill, _apply_over_vars_with_dim
    +        from .missing import _apply_over_vars_with_dim, ffill
     
             new = _apply_over_vars_with_dim(ffill, self, dim=dim, limit=limit)
             return new
    @@ -4203,7 +4203,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "Dataset":
             -------
             Dataset
             """
    -        from .missing import bfill, _apply_over_vars_with_dim
    +        from .missing import _apply_over_vars_with_dim, bfill
     
             new = _apply_over_vars_with_dim(bfill, self, dim=dim, limit=limit)
             return new
    diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
    index 04c0fabae6a..aa7aa1f5e86 100644
    --- a/xarray/core/groupby.py
    +++ b/xarray/core/groupby.py
    @@ -64,8 +64,8 @@ def unique_value_groups(ar, sort=True):
     
     
     def _dummy_copy(xarray_obj):
    -    from .dataset import Dataset
         from .dataarray import DataArray
    +    from .dataset import Dataset
     
         if isinstance(xarray_obj, Dataset):
             res = Dataset(
    diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
    index ab049a0a4b4..28ed2cfb16f 100644
    --- a/xarray/core/indexing.py
    +++ b/xarray/core/indexing.py
    @@ -50,8 +50,8 @@ def _expand_slice(slice_, size):
     
     
     def _sanitize_slice_element(x):
    -    from .variable import Variable
         from .dataarray import DataArray
    +    from .variable import Variable
     
         if isinstance(x, (Variable, DataArray)):
             x = x.values
    diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
    index f9989c2c8c9..41c8d258d7a 100644
    --- a/xarray/core/nanops.py
    +++ b/xarray/core/nanops.py
    @@ -6,6 +6,7 @@
     
     try:
         import dask.array as dask_array
    +
         from . import dask_array_compat
     except ImportError:
         dask_array = None
    @@ -118,7 +119,7 @@ def nansum(a, axis=None, dtype=None, out=None, min_count=None):
     
     def _nanmean_ddof_object(ddof, value, axis=None, dtype=None, **kwargs):
         """ In house nanmean. ddof argument will be used in _nanvar method """
    -    from .duck_array_ops import count, fillna, _dask_or_eager_func, where_method
    +    from .duck_array_ops import _dask_or_eager_func, count, fillna, where_method
     
         valid_count = count(value, axis=axis)
         value = fillna(value, 0)
    diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
    index 86044e72dd2..07d61e595c9 100644
    --- a/xarray/core/parallel.py
    +++ b/xarray/core/parallel.py
    @@ -2,6 +2,7 @@
         import dask
         import dask.array
         from dask.highlevelgraph import HighLevelGraph
    +
         from .dask_array_compat import meta_from_array
     
     except ImportError:
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 6a840e6303e..9f987e7fdf2 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -3207,7 +3207,7 @@ def test_load_dataarray(self):
     @pytest.mark.filterwarnings("ignore:The binary mode of fromstring is deprecated")
     class TestPydap:
         def convert_to_pydap_dataset(self, original):
    -        from pydap.model import GridType, BaseType, DatasetType
    +        from pydap.model import BaseType, DatasetType, GridType
     
             ds = DatasetType("bears", **original.attrs)
             for key, var in original.data_vars.items():
    @@ -3747,9 +3747,10 @@ def test_platecarree(self):
     
         def test_notransform(self):
             # regression test for https://github.com/pydata/xarray/issues/1686
    -        import rasterio
             import warnings
     
    +        import rasterio
    +
             # Create a geotiff file
             with warnings.catch_warnings():
                 # rasterio throws a NotGeoreferencedWarning here, which is
    @@ -4097,8 +4098,8 @@ def test_rasterio_vrt_with_transform_and_size(self):
             # Test open_rasterio() support of WarpedVRT with transform, width and
             # height (issue #2864)
             import rasterio
    -        from rasterio.warp import calculate_default_transform
             from affine import Affine
    +        from rasterio.warp import calculate_default_transform
     
             with create_tmp_geotiff() as (tmp_file, expected):
                 with rasterio.open(tmp_file) as src:
    diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
    index 1efd4b02bf8..457e68f5593 100644
    --- a/xarray/tests/test_coding_times.py
    +++ b/xarray/tests/test_coding_times.py
    @@ -222,9 +222,10 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar):
     @requires_cftime
     @pytest.mark.parametrize("calendar", _ALL_CALENDARS)
     def test_decode_dates_outside_timestamp_range(calendar):
    -    import cftime
         from datetime import datetime
     
    +    import cftime
    +
         units = "days since 0001-01-01"
         times = [datetime(1, 4, 1, h) for h in range(1, 5)]
         time = cftime.date2num(times, units, calendar=calendar)
    @@ -358,9 +359,10 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(calend
     @requires_cftime
     @pytest.mark.parametrize("calendar", _ALL_CALENDARS)
     def test_decode_multidim_time_outside_timestamp_range(calendar):
    -    import cftime
         from datetime import datetime
     
    +    import cftime
    +
         units = "days since 0001-01-01"
         times1 = [datetime(1, 4, day) for day in range(1, 6)]
         times2 = [datetime(1, 5, day) for day in range(1, 6)]
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index 793090cc122..e0da3f1527f 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -6417,8 +6417,8 @@ def test_name_in_masking():
     class TestIrisConversion:
         @requires_iris
         def test_to_and_from_iris(self):
    -        import iris
             import cf_units  # iris requirement
    +        import iris
     
             # to iris
             coord_dict = {}
    @@ -6488,9 +6488,9 @@ def test_to_and_from_iris(self):
         @requires_iris
         @requires_dask
         def test_to_and_from_iris_dask(self):
    +        import cf_units  # iris requirement
             import dask.array as da
             import iris
    -        import cf_units  # iris requirement
     
             coord_dict = {}
             coord_dict["distance"] = ("distance", [-2, 2], {"units": "meters"})
    @@ -6623,8 +6623,8 @@ def test_da_name_from_cube(self, std_name, long_name, var_name, name, attrs):
             ],
         )
         def test_da_coord_name_from_cube(self, std_name, long_name, var_name, name, attrs):
    -        from iris.cube import Cube
             from iris.coords import DimCoord
    +        from iris.cube import Cube
     
             latitude = DimCoord(
                 [-90, 0, 90], standard_name=std_name, var_name=var_name, long_name=long_name
    @@ -6637,8 +6637,8 @@ def test_da_coord_name_from_cube(self, std_name, long_name, var_name, name, attr
     
         @requires_iris
         def test_prevent_duplicate_coord_names(self):
    -        from iris.cube import Cube
             from iris.coords import DimCoord
    +        from iris.cube import Cube
     
             # Iris enforces unique coordinate names. Because we use a different
             # name resolution order a valid iris Cube with coords that have the
    @@ -6659,8 +6659,8 @@ def test_prevent_duplicate_coord_names(self):
             [["IA", "IL", "IN"], [0, 2, 1]],  # non-numeric values  # non-monotonic values
         )
         def test_fallback_to_iris_AuxCoord(self, coord_values):
    -        from iris.cube import Cube
             from iris.coords import AuxCoord
    +        from iris.cube import Cube
     
             data = [0, 0, 0]
             da = xr.DataArray(data, coords=[coord_values], dims=["space"])
    
    From a081d01df11610adea7a48acee5a71d9eb5ffd16 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Wed, 22 Jul 2020 23:17:58 +0200
    Subject: [PATCH 088/342] fix the RTD timeouts (#4254)
    
    * try adding a :okwarning: option
    
    * ignore more warnings
    
    * ignore even more warnings
    ---
     doc/plotting.rst | 45 ++++++++++++++++++++++++++++++++++++++++++++-
     1 file changed, 44 insertions(+), 1 deletion(-)
    
    diff --git a/doc/plotting.rst b/doc/plotting.rst
    index 02ddba1e00c..3903ea5cde9 100644
    --- a/doc/plotting.rst
    +++ b/doc/plotting.rst
    @@ -99,6 +99,7 @@ One Dimension
     The simplest way to make a plot is to call the :py:func:`DataArray.plot()` method.
     
     .. ipython:: python
    +    :okwarning:
     
         air1d = air.isel(lat=10, lon=10)
     
    @@ -125,6 +126,7 @@ can be used:
     .. _matplotlib.pyplot.plot: http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_1d_additional_args.png width=4in
         air1d[:200].plot.line("b-^")
    @@ -137,6 +139,7 @@ can be used:
     Keyword arguments work the same way, and are more explicit.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_example_sin3.png width=4in
         air1d[:200].plot.line(color="purple", marker="o")
    @@ -151,6 +154,7 @@ In this example ``axes`` is an array consisting of the left and right
     axes created by ``plt.subplots``.
     
     .. ipython:: python
    +    :okwarning:
     
         fig, axes = plt.subplots(ncols=2)
     
    @@ -178,6 +182,7 @@ support the ``aspect`` and ``size`` arguments which control the size of the
     resulting image via the formula ``figsize = (aspect * size, size)``:
     
     .. ipython:: python
    +    :okwarning:
     
         air1d.plot(aspect=2, size=3)
         @savefig plotting_example_size_and_aspect.png
    @@ -219,6 +224,7 @@ without coordinates along the x-axis. To illustrate this, let's calculate a 'dec
     from the time and assign it as a non-dimension coordinate:
     
     .. ipython:: python
    +    :okwarning:
     
         decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d")
         air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day))
    @@ -227,6 +233,7 @@ from the time and assign it as a non-dimension coordinate:
     To use ``'decimal_day'`` as x coordinate it must be explicitly specified:
     
     .. ipython:: python
    +    :okwarning:
     
         air1d_multi.plot(x="decimal_day")
     
    @@ -234,6 +241,7 @@ Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``
     it is also possible to use a MultiIndex level as x-axis:
     
     .. ipython:: python
    +    :okwarning:
     
         air1d_multi = air1d_multi.set_index(date=("time", "decimal_day"))
         air1d_multi.plot(x="decimal_day")
    @@ -241,6 +249,7 @@ it is also possible to use a MultiIndex level as x-axis:
     Finally, if a dataset does not have any coordinates it enumerates all data points:
     
     .. ipython:: python
    +    :okwarning:
     
         air1d_multi = air1d_multi.drop("date")
         air1d_multi.plot()
    @@ -256,6 +265,7 @@ with appropriate arguments. Consider the 3D variable ``air`` defined above. We c
     plots to check the variation of air temperature at three different latitudes along a longitude line:
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_example_multiple_lines_x_kwarg.png
         air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time")
    @@ -277,6 +287,7 @@ If required, the automatic legend can be turned off using ``add_legend=False``.
     It is also possible to make line plots such that the data are on the x-axis and a dimension is on the y-axis. This can be done by specifying the appropriate ``y`` keyword argument.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_example_xy_kwarg.png
         air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon")
    @@ -299,6 +310,7 @@ The argument ``where`` defines where the steps should be placed, options are
     when plotting data grouped with :py:meth:`Dataset.groupby_bins`.
     
     .. ipython:: python
    +    :okwarning:
     
         air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90])
         air_mean = air_grp.mean()
    @@ -321,6 +333,7 @@ Other axes kwargs
     The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes direction.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_example_xincrease_yincrease_kwarg.png
         air.isel(time=10, lon=[10, 11]).plot.line(
    @@ -340,6 +353,7 @@ Two Dimensions
     The default method :py:meth:`DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional.
     
     .. ipython:: python
    +    :okwarning:
     
         air2d = air.isel(time=500)
     
    @@ -350,6 +364,7 @@ All 2d plots in xarray allow the use of the keyword arguments ``yincrease``
     and ``xincrease``.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig 2d_simple_yincrease.png width=4in
         air2d.plot(yincrease=False)
    @@ -369,6 +384,7 @@ and ``xincrease``.
     xarray plots data with :ref:`missing_values`.
     
     .. ipython:: python
    +    :okwarning:
     
         bad_air2d = air2d.copy()
     
    @@ -386,6 +402,7 @@ It's not necessary for the coordinates to be evenly spaced. Both
     produce plots with nonuniform coordinates.
     
     .. ipython:: python
    +    :okwarning:
     
         b = air2d.copy()
         # Apply a nonlinear transformation to one of the coords
    @@ -402,6 +419,7 @@ Since this is a thin wrapper around matplotlib, all the functionality of
     matplotlib is available.
     
     .. ipython:: python
    +    :okwarning:
     
         air2d.plot(cmap=plt.cm.Blues)
         plt.title("These colors prove North America\nhas fallen in the ocean")
    @@ -421,6 +439,7 @@ matplotlib is available.
         ``d_ylog.plot()`` updates the xlabel.
     
         .. ipython:: python
    +        :okwarning:
     
             plt.xlabel("Never gonna see this.")
             air2d.plot()
    @@ -436,6 +455,7 @@ xarray borrows logic from Seaborn to infer what kind of color map to use. For
     example, consider the original data in Kelvins rather than Celsius:
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_kelvin.png width=4in
         airtemps.air.isel(time=0).plot()
    @@ -454,6 +474,7 @@ Here we add two bad data points. This affects the color scale,
     washing out the plot.
     
     .. ipython:: python
    +    :okwarning:
     
         air_outliers = airtemps.air.isel(time=0).copy()
         air_outliers[0, 0] = 100
    @@ -469,6 +490,7 @@ This will use the 2nd and 98th
     percentiles of the data to compute the color limits.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_robust2.png width=4in
         air_outliers.plot(robust=True)
    @@ -487,6 +509,7 @@ rather than the default continuous colormaps that matplotlib uses. The
     colormaps. For example, to make a plot with 8 discrete color intervals:
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_discrete_levels.png width=4in
         air2d.plot(levels=8)
    @@ -495,6 +518,7 @@ It is also possible to use a list of levels to specify the boundaries of the
     discrete colormap:
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_listed_levels.png width=4in
         air2d.plot(levels=[0, 12, 18, 30])
    @@ -502,6 +526,7 @@ discrete colormap:
     You can also specify a list of discrete colors through the ``colors`` argument:
     
     .. ipython:: python
    +    :okwarning:
     
         flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
         @savefig plotting_custom_colors_levels.png width=4in
    @@ -559,6 +584,7 @@ arguments to the xarray plotting methods/functions. This returns a
     :py:class:`xarray.plot.FacetGrid` object.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plot_facet_dataarray.png
         g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3)
    @@ -566,6 +592,7 @@ arguments to the xarray plotting methods/functions. This returns a
     Faceting also works for line plots.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plot_facet_dataarray_line.png
         g_simple_line = t.isel(lat=slice(0, None, 4)).plot(
    @@ -582,6 +609,7 @@ a fixed amount. Now we can see how the temperature maps would compare if
     one were much hotter.
     
     .. ipython:: python
    +    :okwarning:
     
         t2 = t.isel(time=slice(0, 2))
         t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim"))
    @@ -603,6 +631,7 @@ Faceted plotting supports other arguments common to xarray 2d plots.
         plt.close("all")
     
     .. ipython:: python
    +    :okwarning:
     
         hasoutliers = t.isel(time=slice(0, 5)).copy()
         hasoutliers[0, 0, 0] = -100
    @@ -649,6 +678,7 @@ Here is an example of using the lower level API and then modifying the axes afte
     they have been plotted.
     
     .. ipython:: python
    +    :okwarning:
     
         g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True)
     
    @@ -688,6 +718,7 @@ Consider this dataset
     Suppose we want to scatter ``A`` against ``B``
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig ds_simple_scatter.png
         ds.plot.scatter(x="A", y="B")
    @@ -695,6 +726,7 @@ Suppose we want to scatter ``A`` against ``B``
     The ``hue`` kwarg lets you vary the color by variable value
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig ds_hue_scatter.png
         ds.plot.scatter(x="A", y="B", hue="w")
    @@ -705,6 +737,7 @@ You can force a legend instead of a colorbar by setting ``hue_style='discrete'``
     Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display of a legend or colorbar (as appropriate).
     
     .. ipython:: python
    +    :okwarning:
     
         ds = ds.assign(w=[1, 2, 3, 5])
         @savefig ds_discrete_legend_hue_scatter.png
    @@ -713,6 +746,7 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display
     The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig ds_hue_size_scatter.png
         ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z")
    @@ -720,6 +754,7 @@ The ``markersize`` kwarg lets you vary the point's size by variable value. You c
     Faceting is also possible
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig ds_facet_scatter.png
         ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete")
    @@ -738,6 +773,7 @@ To follow this section you'll need to have Cartopy installed and working.
     This script will plot the air temperature on a map.
     
     .. ipython:: python
    +    :okwarning:
     
         import cartopy.crs as ccrs
     
    @@ -745,7 +781,8 @@ This script will plot the air temperature on a map.
     
         p = air.isel(time=0).plot(
             subplot_kws=dict(projection=ccrs.Orthographic(-80, 35), facecolor="gray"),
    -        transform=ccrs.PlateCarree())
    +        transform=ccrs.PlateCarree(),
    +    )
         p.axes.set_global()
     
         @savefig plotting_maps_cartopy.png width=100%
    @@ -788,6 +825,7 @@ There are three ways to use the xarray plotting functionality:
     These are provided for user convenience; they all call the same code.
     
     .. ipython:: python
    +    :okwarning:
     
         import xarray.plot as xplt
     
    @@ -837,6 +875,7 @@ think carefully about what the limits, labels, and orientation for
     each of the axes should be.
     
     .. ipython:: python
    +    :okwarning:
     
         @savefig plotting_example_2d_simple.png width=4in
         a.plot()
    @@ -857,6 +896,7 @@ xarray, but you'll have to tell the plot function to use these coordinates
     instead of the default ones:
     
     .. ipython:: python
    +    :okwarning:
     
         lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4))
         lon += lat / 10
    @@ -876,6 +916,7 @@ on a polar projection (:issue:`781`). This is why the default is to not follow
     this convention when plotting on a map:
     
     .. ipython:: python
    +    :okwarning:
     
         import cartopy.crs as ccrs
     
    @@ -890,6 +931,7 @@ You can however decide to infer the cell boundaries and use the
     ``infer_intervals`` keyword:
     
     .. ipython:: python
    +    :okwarning:
     
         ax = plt.subplot(projection=ccrs.PlateCarree())
         da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True)
    @@ -908,6 +950,7 @@ You can however decide to infer the cell boundaries and use the
     One can also make line plots with multidimensional coordinates. In this case, ``hue`` must be a dimension name, not a coordinate name.
     
     .. ipython:: python
    +    :okwarning:
     
         f, ax = plt.subplots(2, 1)
         da.plot.line(x="lon", hue="y", ax=ax[0])
    
    From 0b706a428208305372553e354d08086ab23da3df Mon Sep 17 00:00:00 2001
    From: Aaron Spring 
    Date: Thu, 23 Jul 2020 12:42:28 +0200
    Subject: [PATCH 089/342] CFTimeIndex calendar in repr (#4092)
    
    Also adds a calendar property to CFTimeIndex
    ---
     doc/whats-new.rst                |   5 +-
     xarray/coding/cftimeindex.py     |  95 +++++++++++++++++++++++++
     xarray/tests/test_cftimeindex.py | 115 +++++++++++++++++++++++++++++++
     3 files changed, 214 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index d086d4f411d..e109633a5e1 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -25,6 +25,10 @@ Breaking changes
     
     New Features
     ~~~~~~~~~~~~
    +- Build :py:meth:`CFTimeIndex.__repr__` explicitly as :py:class:`pandas.Index`. Add ``calendar`` as a new
    +  property for :py:class:`CFTimeIndex` and show ``calendar`` and ``length`` in
    +  :py:meth:`CFTimeIndex.__repr__` (:issue:`2416`, :pull:`4092`)
    +  `Aaron Spring `_.
     
     
     Bug fixes
    @@ -173,7 +177,6 @@ Enhancements
       (:pull:`3905`)
       By `Maximilian Roos `_
     
    -
     Bug fixes
     ~~~~~~~~~
     - Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`)
    diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
    index 2a7eaa99edb..cd57af5c7eb 100644
    --- a/xarray/coding/cftimeindex.py
    +++ b/xarray/coding/cftimeindex.py
    @@ -50,8 +50,14 @@
     from xarray.core.utils import is_scalar
     
     from ..core.common import _contains_cftime_datetimes
    +from ..core.options import OPTIONS
     from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name
     
    +# constants for cftimeindex.repr
    +CFTIME_REPR_LENGTH = 19
    +ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS = 100
    +REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END = 10
    +
     
     def named(name, pattern):
         return "(?P<" + name + ">" + pattern + ")"
    @@ -215,6 +221,48 @@ def assert_all_valid_date_type(data):
                 )
     
     
    +def format_row(times, indent=0, separator=", ", row_end=",\n"):
    +    """Format a single row from format_times."""
    +    return indent * " " + separator.join(map(str, times)) + row_end
    +
    +
    +def format_times(
    +    index,
    +    max_width,
    +    offset,
    +    separator=", ",
    +    first_row_offset=0,
    +    intermediate_row_end=",\n",
    +    last_row_end="",
    +):
    +    """Format values of cftimeindex as pd.Index."""
    +    n_per_row = max(max_width // (CFTIME_REPR_LENGTH + len(separator)), 1)
    +    n_rows = int(np.ceil(len(index) / n_per_row))
    +
    +    representation = ""
    +    for row in range(n_rows):
    +        indent = first_row_offset if row == 0 else offset
    +        row_end = last_row_end if row == n_rows - 1 else intermediate_row_end
    +        times_for_row = index[row * n_per_row : (row + 1) * n_per_row]
    +        representation = representation + format_row(
    +            times_for_row, indent=indent, separator=separator, row_end=row_end
    +        )
    +
    +    return representation
    +
    +
    +def format_attrs(index, separator=", "):
    +    """Format attributes of CFTimeIndex for __repr__."""
    +    attrs = {
    +        "dtype": f"'{index.dtype}'",
    +        "length": f"{len(index)}",
    +        "calendar": f"'{index.calendar}'",
    +    }
    +    attrs_str = [f"{k}={v}" for k, v in attrs.items()]
    +    attrs_str = f"{separator}".join(attrs_str)
    +    return attrs_str
    +
    +
     class CFTimeIndex(pd.Index):
         """Custom Index for working with CF calendars and dates
     
    @@ -259,6 +307,46 @@ def __new__(cls, data, name=None):
             result._cache = {}
             return result
     
    +    def __repr__(self):
    +        """
    +        Return a string representation for this object.
    +        """
    +        klass_name = type(self).__name__
    +        display_width = OPTIONS["display_width"]
    +        offset = len(klass_name) + 2
    +
    +        if len(self) <= ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS:
    +            datastr = format_times(
    +                self.values, display_width, offset=offset, first_row_offset=0
    +            )
    +        else:
    +            front_str = format_times(
    +                self.values[:REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END],
    +                display_width,
    +                offset=offset,
    +                first_row_offset=0,
    +                last_row_end=",",
    +            )
    +            end_str = format_times(
    +                self.values[-REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END:],
    +                display_width,
    +                offset=offset,
    +                first_row_offset=offset,
    +            )
    +            datastr = "\n".join([front_str, f"{' '*offset}...", end_str])
    +
    +        attrs_str = format_attrs(self)
    +        # oneliner only if smaller than display_width
    +        full_repr_str = f"{klass_name}([{datastr}], {attrs_str})"
    +        if len(full_repr_str) <= display_width:
    +            return full_repr_str
    +        else:
    +            # if attrs_str too long, one per line
    +            if len(attrs_str) >= display_width - offset:
    +                attrs_str = attrs_str.replace(",", f",\n{' '*(offset-2)}")
    +            full_repr_str = f"{klass_name}([{datastr}],\n{' '*(offset-1)}{attrs_str})"
    +            return full_repr_str
    +
         def _partial_date_slice(self, resolution, parsed):
             """Adapted from
             pandas.tseries.index.DatetimeIndex._partial_date_slice
    @@ -582,6 +670,13 @@ def asi8(self):
                 dtype=np.int64,
             )
     
    +    @property
    +    def calendar(self):
    +        """The calendar used by the datetimes in the index."""
    +        from .times import infer_calendar_name
    +
    +        return infer_calendar_name(self)
    +
         def _round_via_method(self, freq, method):
             """Round dates using a specified method."""
             from .cftime_offsets import CFTIME_TICKS, to_offset
    diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
    index 745ae341370..642609ba059 100644
    --- a/xarray/tests/test_cftimeindex.py
    +++ b/xarray/tests/test_cftimeindex.py
    @@ -1,4 +1,5 @@
     from datetime import timedelta
    +from textwrap import dedent
     
     import numpy as np
     import pandas as pd
    @@ -884,6 +885,120 @@ def test_cftimeindex_shift_invalid_freq():
             index.shift(1, 1)
     
     
    +@requires_cftime
    +@pytest.mark.parametrize(
    +    ("calendar", "expected"),
    +    [
    +        ("noleap", "noleap"),
    +        ("365_day", "noleap"),
    +        ("360_day", "360_day"),
    +        ("julian", "julian"),
    +        ("gregorian", "gregorian"),
    +        ("proleptic_gregorian", "proleptic_gregorian"),
    +    ],
    +)
    +def test_cftimeindex_calendar_property(calendar, expected):
    +    index = xr.cftime_range(start="2000", periods=3, calendar=calendar)
    +    assert index.calendar == expected
    +
    +
    +@requires_cftime
    +@pytest.mark.parametrize(
    +    ("calendar", "expected"),
    +    [
    +        ("noleap", "noleap"),
    +        ("365_day", "noleap"),
    +        ("360_day", "360_day"),
    +        ("julian", "julian"),
    +        ("gregorian", "gregorian"),
    +        ("proleptic_gregorian", "proleptic_gregorian"),
    +    ],
    +)
    +def test_cftimeindex_calendar_repr(calendar, expected):
    +    """Test that cftimeindex has calendar property in repr."""
    +    index = xr.cftime_range(start="2000", periods=3, calendar=calendar)
    +    repr_str = index.__repr__()
    +    assert f" calendar='{expected}'" in repr_str
    +    assert "2000-01-01 00:00:00, 2000-01-02 00:00:00" in repr_str
    +
    +
    +@requires_cftime
    +@pytest.mark.parametrize("periods", [2, 40])
    +def test_cftimeindex_periods_repr(periods):
    +    """Test that cftimeindex has periods property in repr."""
    +    index = xr.cftime_range(start="2000", periods=periods)
    +    repr_str = index.__repr__()
    +    assert f" length={periods}" in repr_str
    +
    +
    +@requires_cftime
    +@pytest.mark.parametrize(
    +    "periods,expected",
    +    [
    +        (
    +            2,
    +            """\
    +CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00],
    +            dtype='object', length=2, calendar='gregorian')""",
    +        ),
    +        (
    +            4,
    +            """\
    +CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00, 2000-01-03 00:00:00,
    +             2000-01-04 00:00:00],
    +            dtype='object', length=4, calendar='gregorian')""",
    +        ),
    +        (
    +            101,
    +            """\
    +CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00, 2000-01-03 00:00:00,
    +             2000-01-04 00:00:00, 2000-01-05 00:00:00, 2000-01-06 00:00:00,
    +             2000-01-07 00:00:00, 2000-01-08 00:00:00, 2000-01-09 00:00:00,
    +             2000-01-10 00:00:00,
    +             ...
    +             2000-04-01 00:00:00, 2000-04-02 00:00:00, 2000-04-03 00:00:00,
    +             2000-04-04 00:00:00, 2000-04-05 00:00:00, 2000-04-06 00:00:00,
    +             2000-04-07 00:00:00, 2000-04-08 00:00:00, 2000-04-09 00:00:00,
    +             2000-04-10 00:00:00],
    +            dtype='object', length=101, calendar='gregorian')""",
    +        ),
    +    ],
    +)
    +def test_cftimeindex_repr_formatting(periods, expected):
    +    """Test that cftimeindex.__repr__ is formatted similar to pd.Index.__repr__."""
    +    index = xr.cftime_range(start="2000", periods=periods)
    +    expected = dedent(expected)
    +    assert expected == repr(index)
    +
    +
    +@requires_cftime
    +@pytest.mark.parametrize("display_width", [40, 80, 100])
    +@pytest.mark.parametrize("periods", [2, 3, 4, 100, 101])
    +def test_cftimeindex_repr_formatting_width(periods, display_width):
    +    """Test that cftimeindex is sensitive to OPTIONS['display_width']."""
    +    index = xr.cftime_range(start="2000", periods=periods)
    +    len_intro_str = len("CFTimeIndex(")
    +    with xr.set_options(display_width=display_width):
    +        repr_str = index.__repr__()
    +        splitted = repr_str.split("\n")
    +        for i, s in enumerate(splitted):
    +            # check that lines not longer than OPTIONS['display_width']
    +            assert len(s) <= display_width, f"{len(s)} {s} {display_width}"
    +            if i > 0:
    +                # check for initial spaces
    +                assert s[:len_intro_str] == " " * len_intro_str
    +
    +
    +@requires_cftime
    +@pytest.mark.parametrize("periods", [22, 50, 100])
    +def test_cftimeindex_repr_101_shorter(periods):
    +    index_101 = xr.cftime_range(start="2000", periods=101)
    +    index_periods = xr.cftime_range(start="2000", periods=periods)
    +    index_101_repr_str = index_101.__repr__()
    +    index_periods_repr_str = index_periods.__repr__()
    +    assert len(index_101_repr_str) < len(index_periods_repr_str)
    +
    +
     @requires_cftime
     def test_parse_array_of_cftime_strings():
         from cftime import DatetimeNoLeap
    
    From 4e893317240ed1a80e65ea2de107e9179bb65446 Mon Sep 17 00:00:00 2001
    From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
    Date: Thu, 23 Jul 2020 11:26:39 -0400
    Subject: [PATCH 090/342] Add release summary, some touch-ups (#4217)
    
    * Add release summary, some touch-ups
    
    * Add Twitter
    
    * Touch up whatsnew entry
    
    * @keewis suggestions
    ---
     HOW_TO_RELEASE.md | 84 +++++++++++++++++++++++++++--------------------
     doc/whats-new.rst |  6 ++--
     2 files changed, 52 insertions(+), 38 deletions(-)
    
    diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md
    index c890d61d966..24995a389c4 100644
    --- a/HOW_TO_RELEASE.md
    +++ b/HOW_TO_RELEASE.md
    @@ -2,78 +2,92 @@
     
     Time required: about an hour.
     
    +These instructions assume that `upstream` refers to the main repository:
    +```
    +$ git remote -v
    +{...}
    +upstream        https://github.com/pydata/xarray (fetch)
    +upstream        https://github.com/pydata/xarray (push)
    +```
    +
      1. Ensure your master branch is synced to upstream:
    -      ```
    -      git pull upstream master
    -      ```
    +     ```sh
    +     git pull upstream master
    +     ```
      2. Get a list of contributors with:
    -    ```
    +    ```sh
         git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /'
         ```
    -    or by substituting the _previous_ release in:
    -    ```
    -    git log v0.X.Y-1.. --format=%aN | sort -u | perl -pe 's/\n/$1, /'
    +    or by substituting the _previous_ release in {0.X.Y-1}:
    +    ```sh
    +    git log v{0.X.Y-1}.. --format=%aN | sort -u | perl -pe 's/\n/$1, /'
         ```
         Add these into `whats-new.rst` somewhere :)
    + 2. Write a release summary: ~50 words describing the high level features. This
    +    will be used in the release emails, tweets, GitHub release notes, etc. 
      3. Look over whats-new.rst and the docs. Make sure "What's New" is complete
    -    (check the date!) and consider adding a brief summary note describing the
    -    release at the top.
    +    (check the date!) and add the release summary at the top.
         Things to watch out for:
         - Important new features should be highlighted towards the top.
         - Function/method references should include links to the API docs.
         - Sometimes notes get added in the wrong section of whats-new, typically
           due to a bad merge. Check for these before a release by using git diff,
    -      e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous
    +      e.g., `git diff v{0.X.Y-1} whats-new.rst` where {0.X.Y-1} is the previous
           release.
    + 4. If possible, open a PR with the release summary and whatsnew changes.
    + 4. After merging, again ensure your master branch is synced to upstream:
    +     ```sh
    +     git pull upstream master
    +     ```
      4. If you have any doubts, run the full test suite one final time!
    -      ```
    +      ```sh
           pytest
           ```
      5. Check that the ReadTheDocs build is passing.
      6. On the master branch, commit the release in git:
    -      ```
    -      git commit -am 'Release v0.X.Y'
    +      ```s
    +      git commit -am 'Release v{0.X.Y}'
           ```
      7. Tag the release:
    +      ```sh
    +      git tag -a v{0.X.Y} -m 'v{0.X.Y}'
           ```
    -      git tag -a v0.X.Y -m 'v0.X.Y'
    -      ```
    - 8. Build source and binary wheels for pypi:
    -      ```
    -      git clean -xdf  # this deletes all uncommited changes!
    + 8. Build source and binary wheels for PyPI:
    +      ```sh
    +      git clean -xdf  # this deletes all uncommitted changes!
           python setup.py bdist_wheel sdist
           ```
      9. Use twine to check the package build:
    +      ```sh
    +      twine check dist/xarray-{0.X.Y}*
           ```
    -      twine check dist/xarray-0.X.Y*
    -      ```
    -10. Use twine to register and upload the release on pypi. Be careful, you can't
    +10. Use twine to register and upload the release on PyPI. Be careful, you can't
         take this back!
    -      ```
    -      twine upload dist/xarray-0.X.Y*
    +      ```sh
    +      twine upload dist/xarray-{0.X.Y}*
           ```
         You will need to be listed as a package owner at
         https://pypi.python.org/pypi/xarray for this to work.
     11. Push your changes to master:
    -      ```
    +      ```sh
           git push upstream master
           git push upstream --tags
           ```
     12. Update the stable branch (used by ReadTheDocs) and switch back to master:
    -     ```
    +     ```sh
           git checkout stable
           git rebase master
    -      git push upstream stable
    +      git push --force upstream stable
           git checkout master
          ```
         It's OK to force push to 'stable' if necessary. (We also update the stable 
    -    branch with `git cherrypick` for documentation only fixes that apply the 
    +    branch with `git cherry-pick` for documentation only fixes that apply the 
         current released version.)
    -13. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst:
    +13. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst:
          ```
    -     .. _whats-new.0.X.Y+1:
    +     .. _whats-new.{0.X.Y+1}:
     
    -     v0.X.Y+1 (unreleased)
    +     v{0.X.Y+1} (unreleased)
          ---------------------
     
          Breaking changes
    @@ -96,19 +110,19 @@ Time required: about an hour.
          ~~~~~~~~~~~~~~~~
          ```
     14. Commit your changes and push to master again:
    -      ```
    +      ```sh
           git commit -am 'New whatsnew section'
           git push upstream master
           ```
         You're done pushing to master!
     15. Issue the release on GitHub. Click on "Draft a new release" at
    -    https://github.com/pydata/xarray/releases. Type in the version number, but
    -    don't bother to describe it -- we maintain that on the docs instead.
    +    https://github.com/pydata/xarray/releases. Type in the version number
    +    and paste the release summary in the notes.
     16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/
         and switch your new release tag (at the bottom) from "Inactive" to "Active".
         It should now build automatically.
    -17. Issue the release announcement! For bug fix releases, I usually only email
    -    xarray@googlegroups.com. For major/feature releases, I will email a broader
    +17. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I 
    +    usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader
         list (no more than once every 3-6 months):
           - pydata@googlegroups.com
           - xarray@googlegroups.com
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e109633a5e1..d85fa0dee14 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -117,8 +117,8 @@ New Features
       :py:func:`combine_by_coords` and :py:func:`combine_nested` using
       combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
       By `John Omotani `_
    -- 'missing_dims' argument to :py:meth:`Dataset.isel`,
    -  `:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing
    +- `missing_dims` argument to :py:meth:`Dataset.isel`,
    +  :py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing
       the exception when a dimension passed to ``isel`` is not present with a
       warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`)
       By `John Omotani `_
    @@ -132,7 +132,7 @@ New Features
       By `Stephan Hoyer `_.
     - Allow plotting of boolean arrays. (:pull:`3766`)
       By `Marek Jacob `_
    -- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`).
    +- Enable using MultiIndex levels as coordinates in 1D and 2D plots (:issue:`3927`).
       By `Mathias Hauser `_.
     - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to
       the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which
    
    From 4293f8040f015da5c44b2b177e9fcb99110a521d Mon Sep 17 00:00:00 2001
    From: Joe Hamman 
    Date: Thu, 23 Jul 2020 09:34:09 -0700
    Subject: [PATCH 091/342] update docs to point to xarray-contrib and
     xarray-tutorial (#4252)
    
    * update docs to point to xarray-contrib and xarray-tutorial
    
    * add link to youtube tutorial
    ---
     doc/examples.rst         | 4 ++--
     doc/index.rst            | 3 +++
     doc/related-projects.rst | 6 ++++--
     3 files changed, 9 insertions(+), 4 deletions(-)
    
    diff --git a/doc/examples.rst b/doc/examples.rst
    index 1d48d29bcc5..102138b6e4e 100644
    --- a/doc/examples.rst
    +++ b/doc/examples.rst
    @@ -2,7 +2,7 @@ Examples
     ========
     
     .. toctree::
    -    :maxdepth: 2
    +    :maxdepth: 1
     
         examples/weather-data
         examples/monthly-means
    @@ -15,7 +15,7 @@ Examples
     Using apply_ufunc
     ------------------
     .. toctree::
    -    :maxdepth: 2
    +    :maxdepth: 1
     
         examples/apply_ufunc_vectorize_1d
     
    diff --git a/doc/index.rst b/doc/index.rst
    index 972eb0a732e..e3cbb331285 100644
    --- a/doc/index.rst
    +++ b/doc/index.rst
    @@ -107,6 +107,7 @@ Documentation
     See also
     --------
     
    +- `Xarray's Tutorial`_ presented at the 2020 SciPy Conference (`video recording`_).
     - Stephan Hoyer and Joe Hamman's `Journal of Open Research Software paper`_ describing the xarray project.
     - The `UW eScience Institute's Geohackweek`_ tutorial on xarray for geospatial data scientists.
     - Stephan Hoyer's `SciPy2015 talk`_ introducing xarray to a general audience.
    @@ -114,6 +115,8 @@ See also
       xarray to users familiar with netCDF.
     - `Nicolas Fauchereau's tutorial`_ on xarray for netCDF users.
     
    +.. _Xarray's Tutorial: https://xarray-contrib.github.io/xarray-tutorial/
    +.. _video recording: https://youtu.be/mecN-Ph_-78
     .. _Journal of Open Research Software paper: http://doi.org/10.5334/jors.148
     .. _UW eScience Institute's Geohackweek : https://geohackweek.github.io/nDarrays/
     .. _SciPy2015 talk: https://www.youtube.com/watch?v=X0pAhJgySxk
    diff --git a/doc/related-projects.rst b/doc/related-projects.rst
    index 8e8e3f63098..ec42ef00246 100644
    --- a/doc/related-projects.rst
    +++ b/doc/related-projects.rst
    @@ -3,9 +3,11 @@
     Xarray related projects
     -----------------------
     
    -Here below is a list of existing open source projects that build
    +Below is a list of existing open source projects that build
     functionality upon xarray. See also section :ref:`internals` for more
    -details on how to build xarray extensions.
    +details on how to build xarray extensions. We also maintain the 
    +`xarray-contrib `_ GitHub organization
    +as a place to curate projects that build upon xarray.
     
     Geosciences
     ~~~~~~~~~~~
    
    From 349c5960f2008099ec99223b005df6552d3f85f9 Mon Sep 17 00:00:00 2001
    From: "James A. Bednar" 
    Date: Thu, 23 Jul 2020 15:42:03 -0500
    Subject: [PATCH 092/342] Added xarrays-spatial and updated geoviews link
     (#4262)
    
    ---
     doc/related-projects.rst | 3 ++-
     1 file changed, 2 insertions(+), 1 deletion(-)
    
    diff --git a/doc/related-projects.rst b/doc/related-projects.rst
    index ec42ef00246..cc780921b34 100644
    --- a/doc/related-projects.rst
    +++ b/doc/related-projects.rst
    @@ -38,6 +38,7 @@ Geosciences
       harmonic wind analysis in Python.
     - `wrf-python `_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model.
     - `xarray-simlab `_: xarray extension for computer model simulations.
    +- `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.)
     - `xarray-topo `_: xarray extension for topographic analysis and modelling.
     - `xbpch `_: xarray interface for bpch files.
     - `xclim `_: A library for calculating climate science indices with unit handling built from xarray and dask.
    @@ -75,7 +76,7 @@ Extend xarray capabilities
     
     Visualization
     ~~~~~~~~~~~~~
    -- `Datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data.
    +- `datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data.
     - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews.
     - `psyplot `_: Interactive data visualization with python.
     - `xarray-leaflet `_: An xarray extension for tiled map plotting based on ipyleaflet.
    
    From 6c1203afbbeb25251705a3bf19c7a7bbe5c0bbf4 Mon Sep 17 00:00:00 2001
    From: Sander 
    Date: Fri, 24 Jul 2020 18:09:25 +0200
    Subject: [PATCH 093/342] Removed skipna argument from count, any, all [GH755]
     (#4258)
    
    * Docs: extracted skipna argument docstring for reduce [GH755]
    
    * Added summary to whats-new.rst
    ---
     doc/whats-new.rst  |  3 ++-
     xarray/core/ops.py | 16 ++++++++++------
     2 files changed, 12 insertions(+), 7 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index d85fa0dee14..b3b9a8bf184 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -37,7 +37,8 @@ Bug fixes
     
     Documentation
     ~~~~~~~~~~~~~
    -
    +- removed skipna argument from :py:meth:`DataArray.count`, any, all. (:issue:`755`)
    +  By `Sander van Rijn `_
     
     Internal Changes
     ~~~~~~~~~~~~~~~~
    diff --git a/xarray/core/ops.py b/xarray/core/ops.py
    index d4aeea37aad..3675317977f 100644
    --- a/xarray/core/ops.py
    +++ b/xarray/core/ops.py
    @@ -90,12 +90,7 @@
     
     Parameters
     ----------
    -{extra_args}
    -skipna : bool, optional
    -    If True, skip missing values (as marked by NaN). By default, only
    -    skips missing values for float dtypes; other dtypes either do not
    -    have a sentinel missing value (int) or skipna=True has not been
    -    implemented (object, datetime64 or timedelta64).{min_count_docs}
    +{extra_args}{skip_na_docs}{min_count_docs}
     keep_attrs : bool, optional
         If True, the attributes (`attrs`) will be copied from the original
         object to the new one.  If False (default), the new object will be
    @@ -111,6 +106,13 @@
         indicated dimension(s) removed.
     """
     
    +_SKIPNA_DOCSTRING = """
    +skipna : bool, optional
    +    If True, skip missing values (as marked by NaN). By default, only
    +    skips missing values for float dtypes; other dtypes either do not
    +    have a sentinel missing value (int) or skipna=True has not been
    +    implemented (object, datetime64 or timedelta64)."""
    +
     _MINCOUNT_DOCSTRING = """
     min_count : int, default None
         The required number of valid values to perform the operation.
    @@ -260,6 +262,7 @@ def inject_reduce_methods(cls):
         for name, f, include_skipna in methods:
             numeric_only = getattr(f, "numeric_only", False)
             available_min_count = getattr(f, "available_min_count", False)
    +        skip_na_docs = _SKIPNA_DOCSTRING if include_skipna else ""
             min_count_docs = _MINCOUNT_DOCSTRING if available_min_count else ""
     
             func = cls._reduce_method(f, include_skipna, numeric_only)
    @@ -268,6 +271,7 @@ def inject_reduce_methods(cls):
                 name=name,
                 cls=cls.__name__,
                 extra_args=cls._reduce_extra_args_docstring.format(name=name),
    +            skip_na_docs=skip_na_docs,
                 min_count_docs=min_count_docs,
             )
             setattr(cls, name, func)
    
    From 98d2829be50318dbbcced6451627be3788db4504 Mon Sep 17 00:00:00 2001
    From: Sander 
    Date: Fri, 24 Jul 2020 21:20:44 +0200
    Subject: [PATCH 094/342] Fix DataArray.copy documentation: remove confusing
     mention of 'dataset' (Gh3606) (#4245)
    
    * Docs: improve `dataarray.copy` docstring + solves GH3606
    
    * updated whats-new
    
    * moved whats-new to correct location
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst        | 5 ++++-
     xarray/core/dataarray.py | 4 ++--
     2 files changed, 6 insertions(+), 3 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index b3b9a8bf184..2ad2a426532 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -37,7 +37,10 @@ Bug fixes
     
     Documentation
     ~~~~~~~~~~~~~
    -- removed skipna argument from :py:meth:`DataArray.count`, any, all. (:issue:`755`)
    +
    +- update the docstring of :py:meth:`DataArray.copy` to remove incorrect mention of 'dataset' (:issue:`3606`)
    +  By `Sander van Rijn `_.
    +- removed skipna argument from :py:meth:`DataArray.count`, :py:meth:`DataArray.any`, :py:meth:`DataArray.all`. (:issue:`755`)
       By `Sander van Rijn `_
     
     Internal Changes
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index dbc4877fa1d..f84f5971080 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -862,8 +862,8 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray":
             """Returns a copy of this array.
     
             If `deep=True`, a deep copy is made of the data array.
    -        Otherwise, a shallow copy is made, so each variable in the new
    -        array's dataset is also a variable in this array's dataset.
    +        Otherwise, a shallow copy is made, and the returned data array's
    +        values are a new view of this data array's values.
     
             Use `data` to create a new object with the same structure as
             original but entirely new data.
    
    From b1c7e315e8a18e86c5751a0aa9024d41a42ca5e8 Mon Sep 17 00:00:00 2001
    From: Jacob Tomlinson 
    Date: Fri, 24 Jul 2020 21:38:57 +0100
    Subject: [PATCH 095/342] Support cupy in as_shared_dtype (#4232)
    
    * Support cupy in as_shared_dtype
    
    * Lint
    
    * Update xarray/core/pycompat.py
    
    * Add type test
    
    * mypy ignore cupy
    
    Co-authored-by: dcherian 
    ---
     setup.cfg                     |  2 ++
     xarray/core/duck_array_ops.py | 14 ++++++++++----
     xarray/core/pycompat.py       |  8 ++++++++
     xarray/core/variable.py       | 12 +++++++-----
     xarray/tests/test_cupy.py     | 10 ++++++++++
     5 files changed, 37 insertions(+), 9 deletions(-)
    
    diff --git a/setup.cfg b/setup.cfg
    index 42dc53bb882..ad0b12a3e32 100644
    --- a/setup.cfg
    +++ b/setup.cfg
    @@ -138,6 +138,8 @@ ignore_missing_imports = True
     ignore_missing_imports = True
     [mypy-cftime.*]
     ignore_missing_imports = True
    +[mypy-cupy.*]
    +ignore_missing_imports = True
     [mypy-dask.*]
     ignore_missing_imports = True
     [mypy-distributed.*]
    diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
    index df579d23544..e82978ef600 100644
    --- a/xarray/core/duck_array_ops.py
    +++ b/xarray/core/duck_array_ops.py
    @@ -14,7 +14,7 @@
     
     from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils
     from .nputils import nanfirst, nanlast
    -from .pycompat import dask_array_type
    +from .pycompat import cupy_array_type, dask_array_type
     
     try:
         import dask.array as dask_array
    @@ -158,17 +158,23 @@ def trapz(y, x, axis):
     )
     
     
    -def asarray(data):
    +def asarray(data, xp=np):
         return (
             data
             if (isinstance(data, dask_array_type) or hasattr(data, "__array_function__"))
    -        else np.asarray(data)
    +        else xp.asarray(data)
         )
     
     
     def as_shared_dtype(scalars_or_arrays):
         """Cast a arrays to a shared dtype using xarray's type promotion rules."""
    -    arrays = [asarray(x) for x in scalars_or_arrays]
    +
    +    if any([isinstance(x, cupy_array_type) for x in scalars_or_arrays]):
    +        import cupy as cp
    +
    +        arrays = [asarray(x, xp=cp) for x in scalars_or_arrays]
    +    else:
    +        arrays = [asarray(x) for x in scalars_or_arrays]
         # Pass arrays directly instead of dtypes to result_type so scalars
         # get handled properly.
         # Note that result_type() safely gets the dtype from dask arrays without
    diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py
    index aaf52b9f295..dcb78d17cf8 100644
    --- a/xarray/core/pycompat.py
    +++ b/xarray/core/pycompat.py
    @@ -17,3 +17,11 @@
         sparse_array_type = (sparse.SparseArray,)
     except ImportError:  # pragma: no cover
         sparse_array_type = ()
    +
    +try:
    +    # solely for isinstance checks
    +    import cupy
    +
    +    cupy_array_type = (cupy.ndarray,)
    +except ImportError:  # pragma: no cover
    +    cupy_array_type = ()
    diff --git a/xarray/core/variable.py b/xarray/core/variable.py
    index c505c749557..f9a41b2cee9 100644
    --- a/xarray/core/variable.py
    +++ b/xarray/core/variable.py
    @@ -33,7 +33,7 @@
     )
     from .npcompat import IS_NEP18_ACTIVE
     from .options import _get_keep_attrs
    -from .pycompat import dask_array_type, integer_types
    +from .pycompat import cupy_array_type, dask_array_type, integer_types
     from .utils import (
         OrderedSet,
         _default,
    @@ -45,9 +45,8 @@
     )
     
     NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
    -    indexing.ExplicitlyIndexed,
    -    pd.Index,
    -) + dask_array_type
    +    (indexing.ExplicitlyIndexed, pd.Index,) + dask_array_type + cupy_array_type
    +)
     # https://github.com/python/mypy/issues/224
     BASIC_INDEXING_TYPES = integer_types + (slice,)  # type: ignore
     
    @@ -257,7 +256,10 @@ def _as_array_or_item(data):
     
         TODO: remove this (replace with np.asarray) once these issues are fixed
         """
    -    data = np.asarray(data)
    +    if isinstance(data, cupy_array_type):
    +        data = data.get()
    +    else:
    +        data = np.asarray(data)
         if data.ndim == 0:
             if data.dtype.kind == "M":
                 data = np.datetime64(data, "ns")
    diff --git a/xarray/tests/test_cupy.py b/xarray/tests/test_cupy.py
    index 624e78d9271..0276b8ebc08 100644
    --- a/xarray/tests/test_cupy.py
    +++ b/xarray/tests/test_cupy.py
    @@ -48,3 +48,13 @@ def test_check_data_stays_on_gpu(toy_weather_data):
         """Perform some operations and check the data stays on the GPU."""
         freeze = (toy_weather_data["tmin"] <= 0).groupby("time.month").mean("time")
         assert isinstance(freeze.data, cp.core.core.ndarray)
    +
    +
    +def test_where():
    +    from xarray.core.duck_array_ops import where
    +
    +    data = cp.zeros(10)
    +
    +    output = where(data < 1, 1, data).all()
    +    assert output
    +    assert isinstance(output, cp.ndarray)
    
    From d2202f382b447845ea3ebc9f48a1263de2f35de4 Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Sat, 25 Jul 2020 23:04:54 +0000
    Subject: [PATCH 096/342] Improve some error messages: apply_ufunc &
     set_options. (#4259)
    
    ---
     xarray/core/computation.py | 28 +++++++++++++++++-----------
     xarray/core/groupby.py     |  3 ++-
     xarray/core/options.py     | 10 +++++++++-
     3 files changed, 28 insertions(+), 13 deletions(-)
    
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 94d4c6b1540..1f2a8a8e746 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -622,9 +622,8 @@ def func(*arrays):
             if data.ndim != len(dims):
                 raise ValueError(
                     "applied function returned data with unexpected "
    -                "number of dimensions: {} vs {}, for dimensions {}".format(
    -                    data.ndim, len(dims), dims
    -                )
    +                f"number of dimensions. Received {data.ndim} dimension(s) but "
    +                f"expected {len(dims)} dimensions with names: {dims!r}"
                 )
     
             var = Variable(dims, data, fastpath=True)
    @@ -984,9 +983,10 @@ def earth_mover_distance(first_samples,
             input_core_dims = ((),) * (len(args))
         elif len(input_core_dims) != len(args):
             raise ValueError(
    -            "input_core_dims must be None or a tuple with the length same to "
    -            "the number of arguments. Given input_core_dims: {}, "
    -            "number of args: {}.".format(input_core_dims, len(args))
    +            f"input_core_dims must be None or a tuple with the length same to "
    +            f"the number of arguments. "
    +            f"Given {len(input_core_dims)} input_core_dims: {input_core_dims}, "
    +            f" but number of args is {len(args)}."
             )
     
         if kwargs is None:
    @@ -994,11 +994,17 @@ def earth_mover_distance(first_samples,
     
         signature = _UFuncSignature(input_core_dims, output_core_dims)
     
    -    if exclude_dims and not exclude_dims <= signature.all_core_dims:
    -        raise ValueError(
    -            "each dimension in `exclude_dims` must also be a "
    -            "core dimension in the function signature"
    -        )
    +    if exclude_dims:
    +        if not isinstance(exclude_dims, set):
    +            raise TypeError(
    +                f"Expected exclude_dims to be a 'set'. Received '{type(exclude_dims).__name__}' instead."
    +            )
    +        if not exclude_dims <= signature.all_core_dims:
    +            raise ValueError(
    +                f"each dimension in `exclude_dims` must also be a "
    +                f"core dimension in the function signature. "
    +                f"Please make {(exclude_dims - signature.all_core_dims)} a core dimension"
    +            )
     
         if kwargs:
             func = functools.partial(func, **kwargs)
    diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
    index aa7aa1f5e86..5087390ecc0 100644
    --- a/xarray/core/groupby.py
    +++ b/xarray/core/groupby.py
    @@ -310,7 +310,8 @@ def __init__(
                 if not hashable(group):
                     raise TypeError(
                         "`group` must be an xarray.DataArray or the "
    -                    "name of an xarray variable or dimension"
    +                    "name of an xarray variable or dimension."
    +                    f"Received {group!r} instead."
                     )
                 group = obj[group]
                 if len(group) == 0:
    diff --git a/xarray/core/options.py b/xarray/core/options.py
    index 5d81ca40a6e..bb1b1c47840 100644
    --- a/xarray/core/options.py
    +++ b/xarray/core/options.py
    @@ -132,7 +132,15 @@ def __init__(self, **kwargs):
                         % (k, set(OPTIONS))
                     )
                 if k in _VALIDATORS and not _VALIDATORS[k](v):
    -                raise ValueError(f"option {k!r} given an invalid value: {v!r}")
    +                if k == ARITHMETIC_JOIN:
    +                    expected = f"Expected one of {_JOIN_OPTIONS!r}"
    +                elif k == DISPLAY_STYLE:
    +                    expected = f"Expected one of {_DISPLAY_OPTIONS!r}"
    +                else:
    +                    expected = ""
    +                raise ValueError(
    +                    f"option {k!r} given an invalid value: {v!r}. " + expected
    +                )
                 self.old[k] = OPTIONS[k]
             self._apply_update(kwargs)
     
    
    From 83987b78a90c24731755d5fe7dc8c38ef2182aab Mon Sep 17 00:00:00 2001
    From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
    Date: Sat, 25 Jul 2020 19:23:49 -0400
    Subject: [PATCH 097/342] xfail failing upstream plotting tests (#4271)
    
    * xfail failing upstream plotting tests
    
    * _
    
    * _
    ---
     xarray/tests/test_plot.py | 1 +
     1 file changed, 1 insertion(+)
    
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index 788c26f3b39..83b2aeee9c6 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -2251,6 +2251,7 @@ def test_datetime_line_plot(self):
             self.darray.plot.line()
     
     
    +@pytest.mark.xfail(reason="Failing on upstream tests asof 2020-07-25")
     @requires_nc_time_axis
     @requires_cftime
     class TestCFDatetimePlot(PlotTestCase):
    
    From 50dcdacc98906f5f5721bb6bbe1b9cef2425dc1e Mon Sep 17 00:00:00 2001
    From: Spencer Clark 
    Date: Sun, 26 Jul 2020 15:04:54 -0400
    Subject: [PATCH 098/342] Un-xfail cftime plotting tests (#4272)
    
    ---
     xarray/tests/test_plot.py | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index 83b2aeee9c6..5a32e454222 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -2251,7 +2251,7 @@ def test_datetime_line_plot(self):
             self.darray.plot.line()
     
     
    -@pytest.mark.xfail(reason="Failing on upstream tests asof 2020-07-25")
    +@pytest.mark.filterwarnings("ignore:setting an array element with a sequence")
     @requires_nc_time_axis
     @requires_cftime
     class TestCFDatetimePlot(PlotTestCase):
    
    From 9d6b174eef5a7f179f104605a3cd48d6b372405a Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sun, 26 Jul 2020 21:13:05 +0200
    Subject: [PATCH 099/342] remove the compatibility code in
     duck_array_ops.allclose_or_equiv (#4270)
    
    ---
     xarray/core/duck_array_ops.py | 19 -------------------
     1 file changed, 19 deletions(-)
    
    diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
    index e82978ef600..3d19288228e 100644
    --- a/xarray/core/duck_array_ops.py
    +++ b/xarray/core/duck_array_ops.py
    @@ -6,7 +6,6 @@
     import contextlib
     import inspect
     import warnings
    -from distutils.version import LooseVersion
     from functools import partial
     
     import numpy as np
    @@ -21,14 +20,6 @@
     except ImportError:
         dask_array = None  # type: ignore
     
    -# TODO: remove after we stop supporting dask < 2.9.1
    -try:
    -    import dask
    -
    -    dask_version = dask.__version__
    -except ImportError:
    -    dask_version = None
    -
     
     def _dask_or_eager_func(
         name,
    @@ -217,16 +208,6 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8):
     
         lazy_equiv = lazy_array_equiv(arr1, arr2)
         if lazy_equiv is None:
    -        # TODO: remove after we require dask >= 2.9.1
    -        sufficient_dask_version = (
    -            dask_version is not None and LooseVersion(dask_version) >= "2.9.1"
    -        )
    -        if not sufficient_dask_version and any(
    -            isinstance(arr, dask_array_type) for arr in [arr1, arr2]
    -        ):
    -            arr1 = np.array(arr1)
    -            arr2 = np.array(arr2)
    -
             return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all())
         else:
             return lazy_equiv
    
    From 8151390c3a7c324b6a208a9a3d994d56b4382d8e Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sun, 26 Jul 2020 21:16:19 +0200
    Subject: [PATCH 100/342] update the list item numbers in the release guide
     (#4264)
    
    ---
     HOW_TO_RELEASE.md | 38 +++++++++++++++++++-------------------
     1 file changed, 19 insertions(+), 19 deletions(-)
    
    diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md
    index 24995a389c4..ec0cca59545 100644
    --- a/HOW_TO_RELEASE.md
    +++ b/HOW_TO_RELEASE.md
    @@ -1,4 +1,4 @@
    -# How to issue an xarray release in 17 easy steps
    +# How to issue an xarray release in 20 easy steps
     
     Time required: about an hour.
     
    @@ -23,9 +23,9 @@ upstream        https://github.com/pydata/xarray (push)
         git log v{0.X.Y-1}.. --format=%aN | sort -u | perl -pe 's/\n/$1, /'
         ```
         Add these into `whats-new.rst` somewhere :)
    - 2. Write a release summary: ~50 words describing the high level features. This
    + 3. Write a release summary: ~50 words describing the high level features. This
         will be used in the release emails, tweets, GitHub release notes, etc. 
    - 3. Look over whats-new.rst and the docs. Make sure "What's New" is complete
    + 4. Look over whats-new.rst and the docs. Make sure "What's New" is complete
         (check the date!) and add the release summary at the top.
         Things to watch out for:
         - Important new features should be highlighted towards the top.
    @@ -34,46 +34,46 @@ upstream        https://github.com/pydata/xarray (push)
           due to a bad merge. Check for these before a release by using git diff,
           e.g., `git diff v{0.X.Y-1} whats-new.rst` where {0.X.Y-1} is the previous
           release.
    - 4. If possible, open a PR with the release summary and whatsnew changes.
    - 4. After merging, again ensure your master branch is synced to upstream:
    + 5. If possible, open a PR with the release summary and whatsnew changes.
    + 6. After merging, again ensure your master branch is synced to upstream:
          ```sh
          git pull upstream master
          ```
    - 4. If you have any doubts, run the full test suite one final time!
    + 7. If you have any doubts, run the full test suite one final time!
           ```sh
           pytest
           ```
    - 5. Check that the ReadTheDocs build is passing.
    - 6. On the master branch, commit the release in git:
    + 8. Check that the ReadTheDocs build is passing.
    + 9. On the master branch, commit the release in git:
           ```s
           git commit -am 'Release v{0.X.Y}'
           ```
    - 7. Tag the release:
    +10. Tag the release:
           ```sh
           git tag -a v{0.X.Y} -m 'v{0.X.Y}'
           ```
    - 8. Build source and binary wheels for PyPI:
    +11. Build source and binary wheels for PyPI:
           ```sh
           git clean -xdf  # this deletes all uncommitted changes!
           python setup.py bdist_wheel sdist
           ```
    - 9. Use twine to check the package build:
    +12. Use twine to check the package build:
           ```sh
           twine check dist/xarray-{0.X.Y}*
           ```
    -10. Use twine to register and upload the release on PyPI. Be careful, you can't
    +13. Use twine to register and upload the release on PyPI. Be careful, you can't
         take this back!
           ```sh
           twine upload dist/xarray-{0.X.Y}*
           ```
         You will need to be listed as a package owner at
         https://pypi.python.org/pypi/xarray for this to work.
    -11. Push your changes to master:
    +14. Push your changes to master:
           ```sh
           git push upstream master
           git push upstream --tags
           ```
    -12. Update the stable branch (used by ReadTheDocs) and switch back to master:
    +15. Update the stable branch (used by ReadTheDocs) and switch back to master:
          ```sh
           git checkout stable
           git rebase master
    @@ -83,7 +83,7 @@ upstream        https://github.com/pydata/xarray (push)
         It's OK to force push to 'stable' if necessary. (We also update the stable 
         branch with `git cherry-pick` for documentation only fixes that apply the 
         current released version.)
    -13. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst:
    +16. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst:
          ```
          .. _whats-new.{0.X.Y+1}:
     
    @@ -109,19 +109,19 @@ upstream        https://github.com/pydata/xarray (push)
          Internal Changes
          ~~~~~~~~~~~~~~~~
          ```
    -14. Commit your changes and push to master again:
    +17. Commit your changes and push to master again:
           ```sh
           git commit -am 'New whatsnew section'
           git push upstream master
           ```
         You're done pushing to master!
    -15. Issue the release on GitHub. Click on "Draft a new release" at
    +18. Issue the release on GitHub. Click on "Draft a new release" at
         https://github.com/pydata/xarray/releases. Type in the version number
         and paste the release summary in the notes.
    -16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/
    +19. Update the docs. Login to https://readthedocs.org/projects/xray/versions/
         and switch your new release tag (at the bottom) from "Inactive" to "Active".
         It should now build automatically.
    -17. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I 
    +20. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I 
         usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader
         list (no more than once every 3-6 months):
           - pydata@googlegroups.com
    
    From a198218ddabe557adbb04311b3234ec8d20419e7 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Mon, 27 Jul 2020 16:04:48 +0200
    Subject: [PATCH 101/342] enable fail_on_warning for RTD (#4269)
    
    ---
     readthedocs.yml | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/readthedocs.yml b/readthedocs.yml
    index 88abb57ae43..072a4b5110c 100644
    --- a/readthedocs.yml
    +++ b/readthedocs.yml
    @@ -7,6 +7,6 @@ conda:
         environment: ci/requirements/doc.yml
     
     sphinx:
    -  fail_on_warning: false
    +  fail_on_warning: true
     
     formats: []
    
    From 9058114f70d07ef04654d1d60718442d0555b84b Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Fri, 31 Jul 2020 23:30:42 +0200
    Subject: [PATCH 102/342] warn about the removal of the ufuncs (#4268)
    
    ---
     doc/api.rst | 4 +++-
     1 file changed, 3 insertions(+), 1 deletion(-)
    
    diff --git a/doc/api.rst b/doc/api.rst
    index 72a6dd4d97a..5e8a2be0ed4 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -572,7 +572,9 @@ Universal functions
        With recent versions of numpy, dask and xarray, NumPy ufuncs are now
        supported directly on all xarray and dask objects. This obviates the need
        for the ``xarray.ufuncs`` module, which should not be used for new code
    -   unless compatibility with versions of NumPy prior to v1.13 is required.
    +   unless compatibility with versions of NumPy prior to v1.13 is
    +   required. They will be removed once support for NumPy prior to
    +   v1.17 is dropped.
     
     These functions are copied from NumPy, but extended to work on NumPy arrays,
     dask arrays and all xarray objects. You can find them in the ``xarray.ufuncs``
    
    From f99c6cca2df959df3db3c57592db97287fd28f15 Mon Sep 17 00:00:00 2001
    From: crusaderky 
    Date: Sun, 2 Aug 2020 10:05:14 +0300
    Subject: [PATCH 103/342] Lazily load resource files (#4297)
    
    * Lazily load resource files
    
    * isort
    ---
     doc/whats-new.rst              |  2 ++
     xarray/core/formatting_html.py | 18 ++++++++++++------
     2 files changed, 14 insertions(+), 6 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 2ad2a426532..4b8f980cade 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -45,6 +45,8 @@ Documentation
     
     Internal Changes
     ~~~~~~~~~~~~~~~~
    +- Only load resource files when running inside a Jupyter Notebook
    +  (:issue:`4294`) By `Guido Imperiale `_
     
     
     .. _whats-new.0.16.0:
    diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
    index 400ef61502e..5521b33e2e4 100644
    --- a/xarray/core/formatting_html.py
    +++ b/xarray/core/formatting_html.py
    @@ -1,18 +1,23 @@
     import uuid
     from collections import OrderedDict
    -from functools import partial
    +from functools import lru_cache, partial
     from html import escape
     
     import pkg_resources
     
     from .formatting import inline_variable_array_repr, short_data_repr
     
    -CSS_FILE_PATH = "/".join(("static", "css", "style.css"))
    -CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8")
    +STATIC_FILES = ("static/html/icons-svg-inline.html", "static/css/style.css")
     
     
    -ICONS_SVG_PATH = "/".join(("static", "html", "icons-svg-inline.html"))
    -ICONS_SVG = pkg_resources.resource_string("xarray", ICONS_SVG_PATH).decode("utf8")
    +@lru_cache(None)
    +def _load_static_files():
    +    """Lazily load the resource files into memory the first time they are needed
    +    """
    +    return [
    +        pkg_resources.resource_string("xarray", fname).decode("utf8")
    +        for fname in STATIC_FILES
    +    ]
     
     
     def short_data_repr_html(array):
    @@ -233,9 +238,10 @@ def _obj_repr(obj, header_components, sections):
         header = f"
    {''.join(h for h in header_components)}
    " sections = "".join(f"
  • {s}
  • " for s in sections) + icons_svg, css_style = _load_static_files() return ( "
    " - f"{ICONS_SVG}" + f"{icons_svg}" f"
    {escape(repr(obj))}
    " "