Skip to content

Commit

Permalink
DEPR: Disallow missing nested label when indexing MultiIndex level (#…
Browse files Browse the repository at this point in the history
…49628)

DEPR: Disallow missing nesed label when indexing MultiIndex level
  • Loading branch information
mroeschke committed Nov 11, 2022
1 parent f81f687 commit 16a4a5f
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 51 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ Removal of prior version deprecations/changes
- Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Enforced disallowing the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
- Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
- Enforced disallowing missing labels when indexing with a sequence of labels on a level of a :class:`MultiIndex`. This now raises a ``KeyError`` (:issue:`42351`)
- Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
- Enforced disallowing positional indexing with a ``float`` key even if that key is a round number, manually cast to integer instead (:issue:`34193`)
- Enforced disallowing using a :class:`DataFrame` indexer with ``.iloc``, use ``.loc`` instead for automatic alignment (:issue:`39022`)
Expand Down
38 changes: 11 additions & 27 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3284,34 +3284,18 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
if not is_hashable(x):
# e.g. slice
raise err
try:
item_indexer = self._get_level_indexer(
x, level=i, indexer=indexer
)
except KeyError:
# ignore not founds; see discussion in GH#39424
warnings.warn(
"The behavior of indexing on a MultiIndex with a "
"nested sequence of labels is deprecated and will "
"change in a future version. "
"`series.loc[label, sequence]` will raise if any "
"members of 'sequence' or not present in "
"the index's second level. To retain the old "
"behavior, use `series.index.isin(sequence, level=1)`",
# TODO: how to opt in to the future behavior?
# TODO: how to handle IntervalIndex level?
# (no test cases)
FutureWarning,
stacklevel=find_stack_level(),
)
continue
# GH 39424: Ignore not founds
# GH 42351: No longer ignore not founds & enforced in 2.0
# TODO: how to handle IntervalIndex level? (no test cases)
item_indexer = self._get_level_indexer(
x, level=i, indexer=indexer
)
if lvl_indexer is None:
lvl_indexer = _to_bool_indexer(item_indexer)
elif isinstance(item_indexer, slice):
lvl_indexer[item_indexer] = True # type: ignore[index]
else:
if lvl_indexer is None:
lvl_indexer = _to_bool_indexer(item_indexer)
elif isinstance(item_indexer, slice):
lvl_indexer[item_indexer] = True # type: ignore[index]
else:
lvl_indexer |= item_indexer
lvl_indexer |= item_indexer

if lvl_indexer is None:
# no matches we are done
Expand Down
13 changes: 5 additions & 8 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,15 +443,12 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
if expected.size == 0 and indexer != []:
with pytest.raises(KeyError, match=str(indexer)):
ser.loc[indexer]
elif indexer == (slice(None), ["foo", "bah"]):
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
with pytest.raises(KeyError, match="'bah'"):
ser.loc[indexer]
else:
warn = None
msg = "MultiIndex with a nested sequence"
if indexer == (slice(None), ["foo", "bah"]):
# "bah" is not in idx.levels[1], so is ignored, will raise KeyError
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
result = ser.loc[indexer]
result = ser.loc[indexer]
tm.assert_series_equal(result, expected)


Expand Down
30 changes: 14 additions & 16 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import copy
import re
from textwrap import dedent
Expand Down Expand Up @@ -701,26 +702,26 @@ def test_applymap_subset(self, slice_, df):
def test_applymap_subset_multiindex(self, slice_):
# GH 19861
# edited for GH 33562
warn = None
msg = "indexing on a MultiIndex with a nested sequence of labels"
if (
isinstance(slice_[-1], tuple)
and isinstance(slice_[-1][-1], list)
and "C" in slice_[-1][-1]
):
warn = FutureWarning
ctx = pytest.raises(KeyError, match="C") # noqa: PDF010
elif (
isinstance(slice_[0], tuple)
and isinstance(slice_[0][1], list)
and 3 in slice_[0][1]
):
warn = FutureWarning
ctx = pytest.raises(KeyError, match="3") # noqa: PDF010
else:
ctx = contextlib.nullcontext()

idx = MultiIndex.from_product([["a", "b"], [1, 2]])
col = MultiIndex.from_product([["x", "y"], ["A", "B"]])
df = DataFrame(np.random.rand(4, 4), columns=col, index=idx)

with tm.assert_produces_warning(warn, match=msg):
with ctx:
df.style.applymap(lambda x: "color: red;", subset=slice_).to_html()

def test_applymap_subset_multiindex_code(self):
Expand Down Expand Up @@ -1390,7 +1391,7 @@ def test_non_reducing_slice_on_multiindex(self):
IndexSlice[:, IndexSlice["a", :, "e"]],
IndexSlice[:, IndexSlice[:, "c", "e"]],
IndexSlice[:, IndexSlice["a", ["c", "d"], :]], # check list
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # allow missing
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # don't allow missing
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], "e"]], # no slice
# check rows
IndexSlice[IndexSlice[["U"]], :], # inferred deeper need list
Expand All @@ -1399,7 +1400,7 @@ def test_non_reducing_slice_on_multiindex(self):
IndexSlice[IndexSlice["U", :, "Y"], :],
IndexSlice[IndexSlice[:, "W", "Y"], :],
IndexSlice[IndexSlice[:, "W", ["Y", "Z"]], :], # check list
IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing
IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # don't allow missing
IndexSlice[IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice
# check simultaneous
IndexSlice[IndexSlice[:, "W", "Y"], IndexSlice["a", "c", :]],
Expand All @@ -1411,21 +1412,18 @@ def test_non_reducing_multi_slice_on_multiindex(self, slice_):
idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]])
df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs)

msg = "indexing on a MultiIndex with a nested sequence of labels"
warn = None
for lvl in [0, 1]:
key = slice_[lvl]
if isinstance(key, tuple):
for subkey in key:
if isinstance(subkey, list) and "-" in subkey:
# not present in the index level, ignored, will raise in future
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
expected = df.loc[slice_]
# not present in the index level, raises KeyError since 2.0
with pytest.raises(KeyError, match="-"):
df.loc[slice_]
return

with tm.assert_produces_warning(warn, match=msg):
result = df.loc[non_reducing_slice(slice_)]
expected = df.loc[slice_]
result = df.loc[non_reducing_slice(slice_)]
tm.assert_frame_equal(result, expected)


Expand Down

0 comments on commit 16a4a5f

Please sign in to comment.