Skip to content

Commit

Permalink
DEPR: Index.__and__, __or__, __xor__ behaving as set ops (#37374)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 2, 2020
1 parent 462b21d commit 821f90c
Show file tree
Hide file tree
Showing 16 changed files with 79 additions and 32 deletions.
12 changes: 4 additions & 8 deletions doc/source/user_guide/indexing.rst
Expand Up @@ -1594,19 +1594,16 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
Set operations on Index objects
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The two main operations are ``union (|)`` and ``intersection (&)``.
These can be directly called as instance methods or used via overloaded
operators. Difference is provided via the ``.difference()`` method.
The two main operations are ``union`` and ``intersection``.
Difference is provided via the ``.difference()`` method.

.. ipython:: python
a = pd.Index(['c', 'b', 'a'])
b = pd.Index(['c', 'e', 'd'])
a | b
a & b
a.difference(b)
Also available is the ``symmetric_difference (^)`` operation, which returns elements
Also available is the ``symmetric_difference`` operation, which returns elements
that appear in either ``idx1`` or ``idx2``, but not in both. This is
equivalent to the Index created by ``idx1.difference(idx2).union(idx2.difference(idx1))``,
with duplicates dropped.
Expand All @@ -1616,7 +1613,6 @@ with duplicates dropped.
idx1 = pd.Index([1, 2, 3, 4])
idx2 = pd.Index([2, 3, 4, 5])
idx1.symmetric_difference(idx2)
idx1 ^ idx2
.. note::

Expand All @@ -1631,7 +1627,7 @@ integer values are converted to float
idx1 = pd.Index([0, 1, 2])
idx2 = pd.Index([0.5, 1.5])
idx1 | idx2
idx1.union(idx2)
.. _indexing.missing:

Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/missing_data.rst
Expand Up @@ -466,7 +466,7 @@ at the new values.
ser = pd.Series(np.sort(np.random.uniform(size=100)))
# interpolate at new_index
new_index = ser.index | pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
new_index = ser.index.union(pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]))
interp_s = ser.reindex(new_index).interpolate(method="pchip")
interp_s[49:51]
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Expand Up @@ -338,6 +338,7 @@ Deprecations
- Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`)

.. ---------------------------------------------------------------------------
Expand Down
21 changes: 21 additions & 0 deletions pandas/core/indexes/base.py
Expand Up @@ -2504,14 +2504,35 @@ def __iadd__(self, other):

@final
def __and__(self, other):
warnings.warn(
"Index.__and__ operating as a set operation is deprecated, "
"in the future this will be a logical operation matching "
"Series.__and__. Use index.intersection(other) instead",
FutureWarning,
stacklevel=2,
)
return self.intersection(other)

@final
def __or__(self, other):
warnings.warn(
"Index.__or__ operating as a set operation is deprecated, "
"in the future this will be a logical operation matching "
"Series.__or__. Use index.union(other) instead",
FutureWarning,
stacklevel=2,
)
return self.union(other)

@final
def __xor__(self, other):
warnings.warn(
"Index.__xor__ operating as a set operation is deprecated, "
"in the future this will be a logical operation matching "
"Series.__xor__. Use index.symmetric_difference(other) instead",
FutureWarning,
stacklevel=2,
)
return self.symmetric_difference(other)

@final
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/indexes/multi.py
Expand Up @@ -3126,12 +3126,12 @@ def _convert_to_indexer(r) -> Int64Index:
r = r.nonzero()[0]
return Int64Index(r)

def _update_indexer(idxr, indexer=indexer):
def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index:
if indexer is None:
indexer = Index(np.arange(n))
if idxr is None:
return indexer
return indexer & idxr
return indexer.intersection(idxr)

for i, k in enumerate(seq):

Expand All @@ -3149,7 +3149,9 @@ def _update_indexer(idxr, indexer=indexer):
idxrs = _convert_to_indexer(
self._get_level_indexer(x, level=i, indexer=indexer)
)
indexers = idxrs if indexers is None else indexers | idxrs
indexers = (idxrs if indexers is None else indexers).union(
idxrs
)
except KeyError:

# ignore not founds
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Expand Up @@ -725,7 +725,7 @@ def __array_ufunc__(
# it to handle *args.
index = alignable[0].index
for s in alignable[1:]:
index |= s.index
index = index.union(s.index)
inputs = tuple(
x.reindex(index) if issubclass(t, Series) else x
for x, t in zip(inputs, types)
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/excel.py
Expand Up @@ -475,10 +475,10 @@ def __init__(
if cols is not None:

# all missing, raise
if not len(Index(cols) & df.columns):
if not len(Index(cols).intersection(df.columns)):
raise KeyError("passes columns are not ALL present dataframe")

if len(Index(cols) & df.columns) != len(cols):
if len(Index(cols).intersection(df.columns)) != len(cols):
# Deprecated in GH#17295, enforced in 1.0.0
raise KeyError("Not all names specified in 'columns' are found")

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/json/_json.py
Expand Up @@ -268,7 +268,7 @@ def __init__(
if (
(obj.ndim == 1)
and (obj.name in set(obj.index.names))
or len(obj.columns & obj.index.names)
or len(obj.columns.intersection(obj.index.names))
):
msg = "Overlapping names between the index and columns"
raise ValueError(msg)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/datetimes/test_setops.py
Expand Up @@ -300,7 +300,8 @@ def test_intersection_bug_1708(self):
index_1 = date_range("1/1/2012", periods=4, freq="12H")
index_2 = index_1 + DateOffset(hours=1)

result = index_1 & index_2
with tm.assert_produces_warning(FutureWarning):
result = index_1 & index_2
assert len(result) == 0

@pytest.mark.parametrize("tz", tz)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/indexes/multi/test_setops.py
Expand Up @@ -105,11 +105,13 @@ def test_symmetric_difference(idx, sort):
def test_multiindex_symmetric_difference():
# GH 13490
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
result = idx ^ idx
with tm.assert_produces_warning(FutureWarning):
result = idx ^ idx
assert result.names == idx.names

idx2 = idx.copy().rename(["A", "B"])
result = idx ^ idx2
with tm.assert_produces_warning(FutureWarning):
result = idx ^ idx2
assert result.names == [None, None]


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/test_base.py
Expand Up @@ -1008,7 +1008,8 @@ def test_symmetric_difference(self, sort):
tm.assert_index_equal(result, expected)

# __xor__ syntax
expected = index1 ^ index2
with tm.assert_produces_warning(FutureWarning):
expected = index1 ^ index2
assert tm.equalContents(result, expected)
assert result.name is None

Expand Down
15 changes: 14 additions & 1 deletion pandas/tests/indexes/test_setops.py
Expand Up @@ -93,5 +93,18 @@ def test_union_dtypes(left, right, expected):
right = pandas_dtype(right)
a = pd.Index([], dtype=left)
b = pd.Index([], dtype=right)
result = (a | b).dtype
result = a.union(b).dtype
assert result == expected


def test_dunder_inplace_setops_deprecated(index):
# GH#37374 these will become logical ops, not setops

with tm.assert_produces_warning(FutureWarning):
index |= index

with tm.assert_produces_warning(FutureWarning):
index &= index

with tm.assert_produces_warning(FutureWarning):
index ^= index
6 changes: 4 additions & 2 deletions pandas/tests/indexes/timedeltas/test_setops.py
Expand Up @@ -97,13 +97,15 @@ def test_intersection_bug_1708(self):
index_1 = timedelta_range("1 day", periods=4, freq="h")
index_2 = index_1 + pd.offsets.Hour(5)

result = index_1 & index_2
with tm.assert_produces_warning(FutureWarning):
result = index_1 & index_2
assert len(result) == 0

index_1 = timedelta_range("1 day", periods=4, freq="h")
index_2 = index_1 + pd.offsets.Hour(1)

result = index_1 & index_2
with tm.assert_produces_warning(FutureWarning):
result = index_1 & index_2
expected = timedelta_range("1 day 01:00:00", periods=3, freq="h")
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/resample/test_datetime_index.py
Expand Up @@ -1112,9 +1112,9 @@ def test_resample_anchored_multiday():
#
# See: https://github.com/pandas-dev/pandas/issues/8683

index = pd.date_range(
"2014-10-14 23:06:23.206", periods=3, freq="400L"
) | pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
index1 = pd.date_range("2014-10-14 23:06:23.206", periods=3, freq="400L")
index2 = pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
index = index1.union(index2)

s = Series(np.random.randn(5), index=index)

Expand Down
16 changes: 11 additions & 5 deletions pandas/tests/series/test_logical_ops.py
Expand Up @@ -269,11 +269,13 @@ def test_reversed_xor_with_index_returns_index(self):
idx2 = Index([1, 0, 1, 0])

expected = Index.symmetric_difference(idx1, ser)
result = idx1 ^ ser
with tm.assert_produces_warning(FutureWarning):
result = idx1 ^ ser
tm.assert_index_equal(result, expected)

expected = Index.symmetric_difference(idx2, ser)
result = idx2 ^ ser
with tm.assert_produces_warning(FutureWarning):
result = idx2 ^ ser
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -304,11 +306,13 @@ def test_reversed_logical_op_with_index_returns_series(self, op):
idx2 = Index([1, 0, 1, 0])

expected = Series(op(idx1.values, ser.values))
result = op(ser, idx1)
with tm.assert_produces_warning(FutureWarning):
result = op(ser, idx1)
tm.assert_series_equal(result, expected)

expected = Series(op(idx2.values, ser.values))
result = op(ser, idx2)
with tm.assert_produces_warning(FutureWarning):
result = op(ser, idx2)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
Expand All @@ -324,7 +328,9 @@ def test_reverse_ops_with_index(self, op, expected):
# multi-set Index ops are buggy, so let's avoid duplicates...
ser = Series([True, False])
idx = Index([False, True])
result = op(ser, idx)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# behaving as set ops is deprecated, will become logical ops
result = op(ser, idx)
tm.assert_index_equal(result, expected)

def test_logical_ops_label_based(self):
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/test_strings.py
Expand Up @@ -645,7 +645,9 @@ def test_str_cat_align_mixed_inputs(self, join):
u = np.array(["A", "B", "C", "D"])
expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
# joint index of rhs [t, u]; u will be forced have index of s
rhs_idx = t.index & s.index if join == "inner" else t.index | s.index
rhs_idx = (
t.index.intersection(s.index) if join == "inner" else t.index.union(s.index)
)

expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]
result = s.str.cat([t, u], join=join, na_rep="-")
Expand Down

0 comments on commit 821f90c

Please sign in to comment.