Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Index.__and__, __or__, __xor__ behaving as set ops #37374

Merged
merged 10 commits into from
Nov 2, 2020
12 changes: 4 additions & 8 deletions doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1594,19 +1594,16 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
Set operations on Index objects
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The two main operations are ``union (|)`` and ``intersection (&)``.
These can be directly called as instance methods or used via overloaded
operators. Difference is provided via the ``.difference()`` method.
The two main operations are ``union`` and ``intersection``.
Difference is provided via the ``.difference()`` method.

.. ipython:: python

a = pd.Index(['c', 'b', 'a'])
b = pd.Index(['c', 'e', 'd'])
a | b
a & b
a.difference(b)

Also available is the ``symmetric_difference (^)`` operation, which returns elements
Also available is the ``symmetric_difference`` operation, which returns elements
that appear in either ``idx1`` or ``idx2``, but not in both. This is
equivalent to the Index created by ``idx1.difference(idx2).union(idx2.difference(idx1))``,
with duplicates dropped.
Expand All @@ -1616,7 +1613,6 @@ with duplicates dropped.
idx1 = pd.Index([1, 2, 3, 4])
idx2 = pd.Index([2, 3, 4, 5])
idx1.symmetric_difference(idx2)
idx1 ^ idx2

.. note::

Expand All @@ -1631,7 +1627,7 @@ integer values are converted to float

idx1 = pd.Index([0, 1, 2])
idx2 = pd.Index([0.5, 1.5])
idx1 | idx2
idx1.union(idx2)

.. _indexing.missing:

Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ at the new values.
ser = pd.Series(np.sort(np.random.uniform(size=100)))

# interpolate at new_index
new_index = ser.index | pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
new_index = ser.index.union(pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]))
interp_s = ser.reindex(new_index).interpolate(method="pchip")
interp_s[49:51]

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ Deprecations
- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)
- Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`)

.. ---------------------------------------------------------------------------

Expand Down
21 changes: 21 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2468,12 +2468,33 @@ def __iadd__(self, other):
return self + other

def __and__(self, other):
warnings.warn(
"Index.__and__ operating as a set operation is deprecated, "
"in the future this will be a logical operation matching "
"Series.__and__. Use index.intersection(other) instead",
FutureWarning,
stacklevel=2,
)
return self.intersection(other)

def __or__(self, other):
warnings.warn(
"Index.__or__ operating as a set operation is deprecated, "
"in the future this will be a logical operation matching "
"Series.__or__. Use index.union(other) instead",
FutureWarning,
stacklevel=2,
)
return self.union(other)

def __xor__(self, other):
warnings.warn(
"Index.__xor__ operating as a set operation is deprecated, "
"in the future this will be a logical operation matching "
"Series.__xor__. Use index.symmetric_difference(other) instead",
FutureWarning,
stacklevel=2,
)
return self.symmetric_difference(other)

def __nonzero__(self):
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3136,12 +3136,12 @@ def _convert_to_indexer(r) -> Int64Index:
r = r.nonzero()[0]
return Int64Index(r)

def _update_indexer(idxr, indexer=indexer):
def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index:
if indexer is None:
indexer = Index(np.arange(n))
if idxr is None:
return indexer
return indexer & idxr
return indexer.intersection(idxr)

for i, k in enumerate(seq):

Expand All @@ -3159,7 +3159,9 @@ def _update_indexer(idxr, indexer=indexer):
idxrs = _convert_to_indexer(
self._get_level_indexer(x, level=i, indexer=indexer)
)
indexers = idxrs if indexers is None else indexers | idxrs
indexers = (idxrs if indexers is None else indexers).union(
idxrs
)
except KeyError:

# ignore not founds
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ def __array_ufunc__(
# it to handle *args.
index = alignable[0].index
for s in alignable[1:]:
index |= s.index
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we certainly are using this (or at least were), so need to make sure these inplace ops are deprecated as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add some tests for the inplace to make sure they are warning as well

index = index.union(s.index)
inputs = tuple(
x.reindex(index) if issubclass(t, Series) else x
for x, t in zip(inputs, types)
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,10 +475,10 @@ def __init__(
if cols is not None:

# all missing, raise
if not len(Index(cols) & df.columns):
if not len(Index(cols).intersection(df.columns)):
raise KeyError("passes columns are not ALL present dataframe")

if len(Index(cols) & df.columns) != len(cols):
if len(Index(cols).intersection(df.columns)) != len(cols):
# Deprecated in GH#17295, enforced in 1.0.0
raise KeyError("Not all names specified in 'columns' are found")

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def __init__(
if (
(obj.ndim == 1)
and (obj.name in set(obj.index.names))
or len(obj.columns & obj.index.names)
or len(obj.columns.intersection(obj.index.names))
):
msg = "Overlapping names between the index and columns"
raise ValueError(msg)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ def test_intersection_bug_1708(self):
index_1 = date_range("1/1/2012", periods=4, freq="12H")
index_2 = index_1 + DateOffset(hours=1)

result = index_1 & index_2
with tm.assert_produces_warning(FutureWarning):
result = index_1 & index_2
assert len(result) == 0

@pytest.mark.parametrize("tz", tz)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/indexes/multi/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,13 @@ def test_symmetric_difference(idx, sort):
def test_multiindex_symmetric_difference():
# GH 13490
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
result = idx ^ idx
with tm.assert_produces_warning(FutureWarning):
result = idx ^ idx
assert result.names == idx.names

idx2 = idx.copy().rename(["A", "B"])
result = idx ^ idx2
with tm.assert_produces_warning(FutureWarning):
result = idx ^ idx2
assert result.names == [None, None]


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,8 @@ def test_symmetric_difference(self, sort):
tm.assert_index_equal(result, expected)

# __xor__ syntax
expected = index1 ^ index2
with tm.assert_produces_warning(FutureWarning):
expected = index1 ^ index2
assert tm.equalContents(result, expected)
assert result.name is None

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,5 @@ def test_union_dtypes(left, right, expected):
right = pandas_dtype(right)
a = pd.Index([], dtype=left)
b = pd.Index([], dtype=right)
result = (a | b).dtype
result = a.union(b).dtype
assert result == expected
6 changes: 4 additions & 2 deletions pandas/tests/indexes/timedeltas/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,15 @@ def test_intersection_bug_1708(self):
index_1 = timedelta_range("1 day", periods=4, freq="h")
index_2 = index_1 + pd.offsets.Hour(5)

result = index_1 & index_2
with tm.assert_produces_warning(FutureWarning):
result = index_1 & index_2
assert len(result) == 0

index_1 = timedelta_range("1 day", periods=4, freq="h")
index_2 = index_1 + pd.offsets.Hour(1)

result = index_1 & index_2
with tm.assert_produces_warning(FutureWarning):
result = index_1 & index_2
expected = timedelta_range("1 day 01:00:00", periods=3, freq="h")
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,9 +1112,9 @@ def test_resample_anchored_multiday():
#
# See: https://github.com/pandas-dev/pandas/issues/8683

index = pd.date_range(
"2014-10-14 23:06:23.206", periods=3, freq="400L"
) | pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
index1 = pd.date_range("2014-10-14 23:06:23.206", periods=3, freq="400L")
index2 = pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
index = index1.union(index2)

s = Series(np.random.randn(5), index=index)

Expand Down
16 changes: 11 additions & 5 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,13 @@ def test_reversed_xor_with_index_returns_index(self):
idx2 = Index([1, 0, 1, 0])

expected = Index.symmetric_difference(idx1, ser)
result = idx1 ^ ser
with tm.assert_produces_warning(FutureWarning):
result = idx1 ^ ser
tm.assert_index_equal(result, expected)

expected = Index.symmetric_difference(idx2, ser)
result = idx2 ^ ser
with tm.assert_produces_warning(FutureWarning):
result = idx2 ^ ser
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -304,11 +306,13 @@ def test_reversed_logical_op_with_index_returns_series(self, op):
idx2 = Index([1, 0, 1, 0])

expected = Series(op(idx1.values, ser.values))
result = op(ser, idx1)
with tm.assert_produces_warning(FutureWarning):
result = op(ser, idx1)
tm.assert_series_equal(result, expected)

expected = Series(op(idx2.values, ser.values))
result = op(ser, idx2)
with tm.assert_produces_warning(FutureWarning):
result = op(ser, idx2)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
Expand All @@ -324,7 +328,9 @@ def test_reverse_ops_with_index(self, op, expected):
# multi-set Index ops are buggy, so let's avoid duplicates...
ser = Series([True, False])
idx = Index([False, True])
result = op(ser, idx)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# behaving as set ops is deprecated, will become logical ops
result = op(ser, idx)
tm.assert_index_equal(result, expected)

def test_logical_ops_label_based(self):
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,9 @@ def test_str_cat_align_mixed_inputs(self, join):
u = np.array(["A", "B", "C", "D"])
expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
# joint index of rhs [t, u]; u will be forced have index of s
rhs_idx = t.index & s.index if join == "inner" else t.index | s.index
rhs_idx = (
t.index.intersection(s.index) if join == "inner" else t.index.union(s.index)
)

expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]
result = s.str.cat([t, u], join=join, na_rep="-")
Expand Down