DEPR: Index.__and__, __or__, __xor__ behaving as set ops (#37374)

pandas-dev · Nov 2, 2020 · 821f90c · 821f90c
1 parent 462b21d
commit 821f90c
Show file tree

Hide file tree

Showing 16 changed files with 79 additions and 32 deletions.
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
@@ -1594,19 +1594,16 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
 Set operations on Index objects
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The two main operations are ``union (|)`` and ``intersection (&)``.
-These can be directly called as instance methods or used via overloaded
-operators. Difference is provided via the ``.difference()`` method.
+The two main operations are ``union`` and ``intersection``.
+Difference is provided via the ``.difference()`` method.
 
 .. ipython:: python
 
    a = pd.Index(['c', 'b', 'a'])
    b = pd.Index(['c', 'e', 'd'])
-   a | b
-   a & b
    a.difference(b)
 
-Also available is the ``symmetric_difference (^)`` operation, which returns elements
+Also available is the ``symmetric_difference`` operation, which returns elements
 that appear in either ``idx1`` or ``idx2``, but not in both. This is
 equivalent to the Index created by ``idx1.difference(idx2).union(idx2.difference(idx1))``,
 with duplicates dropped.
@@ -1616,7 +1613,6 @@ with duplicates dropped.
    idx1 = pd.Index([1, 2, 3, 4])
    idx2 = pd.Index([2, 3, 4, 5])
    idx1.symmetric_difference(idx2)
-   idx1 ^ idx2
 
 .. note::
 
@@ -1631,7 +1627,7 @@ integer values are converted to float
 
    idx1 = pd.Index([0, 1, 2])
    idx2 = pd.Index([0.5, 1.5])
-   idx1 | idx2
+   idx1.union(idx2)
 
 .. _indexing.missing:
 

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
@@ -466,7 +466,7 @@ at the new values.
    ser = pd.Series(np.sort(np.random.uniform(size=100)))
 
    # interpolate at new_index
-   new_index = ser.index | pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
+   new_index = ser.index.union(pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]))
    interp_s = ser.reindex(new_index).interpolate(method="pchip")
    interp_s[49:51]
 

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -338,6 +338,7 @@ Deprecations
 - Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
 - :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
 - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
+- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior.  Use the named set methods instead (:issue:`36758`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2504,14 +2504,35 @@ def __iadd__(self, other):
 
     @final
     def __and__(self, other):
+        warnings.warn(
+            "Index.__and__ operating as a set operation is deprecated, "
+            "in the future this will be a logical operation matching "
+            "Series.__and__.  Use index.intersection(other) instead",
+            FutureWarning,
+            stacklevel=2,
+        )
         return self.intersection(other)
 
     @final
     def __or__(self, other):
+        warnings.warn(
+            "Index.__or__ operating as a set operation is deprecated, "
+            "in the future this will be a logical operation matching "
+            "Series.__or__.  Use index.union(other) instead",
+            FutureWarning,
+            stacklevel=2,
+        )
         return self.union(other)
 
     @final
     def __xor__(self, other):
+        warnings.warn(
+            "Index.__xor__ operating as a set operation is deprecated, "
+            "in the future this will be a logical operation matching "
+            "Series.__xor__.  Use index.symmetric_difference(other) instead",
+            FutureWarning,
+            stacklevel=2,
+        )
         return self.symmetric_difference(other)
 
     @final

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -3126,12 +3126,12 @@ def _convert_to_indexer(r) -> Int64Index:
                 r = r.nonzero()[0]
             return Int64Index(r)
 
-        def _update_indexer(idxr, indexer=indexer):
+        def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index:
             if indexer is None:
                 indexer = Index(np.arange(n))
             if idxr is None:
                 return indexer
-            return indexer & idxr
+            return indexer.intersection(idxr)
 
         for i, k in enumerate(seq):
 
@@ -3149,7 +3149,9 @@ def _update_indexer(idxr, indexer=indexer):
                         idxrs = _convert_to_indexer(
                             self._get_level_indexer(x, level=i, indexer=indexer)
                         )
-                        indexers = idxrs if indexers is None else indexers | idxrs
+                        indexers = (idxrs if indexers is None else indexers).union(
+                            idxrs
+                        )
                     except KeyError:
 
                         # ignore not founds

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -725,7 +725,7 @@ def __array_ufunc__(
             # it to handle *args.
             index = alignable[0].index
             for s in alignable[1:]:
-                index |= s.index
+                index = index.union(s.index)
             inputs = tuple(
                 x.reindex(index) if issubclass(t, Series) else x
                 for x, t in zip(inputs, types)

diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -475,10 +475,10 @@ def __init__(
         if cols is not None:
 
             # all missing, raise
-            if not len(Index(cols) & df.columns):
+            if not len(Index(cols).intersection(df.columns)):
                 raise KeyError("passes columns are not ALL present dataframe")
 
-            if len(Index(cols) & df.columns) != len(cols):
+            if len(Index(cols).intersection(df.columns)) != len(cols):
                 # Deprecated in GH#17295, enforced in 1.0.0
                 raise KeyError("Not all names specified in 'columns' are found")
 

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -268,7 +268,7 @@ def __init__(
         if (
             (obj.ndim == 1)
             and (obj.name in set(obj.index.names))
-            or len(obj.columns & obj.index.names)
+            or len(obj.columns.intersection(obj.index.names))
         ):
             msg = "Overlapping names between the index and columns"
             raise ValueError(msg)

diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
@@ -300,7 +300,8 @@ def test_intersection_bug_1708(self):
         index_1 = date_range("1/1/2012", periods=4, freq="12H")
         index_2 = index_1 + DateOffset(hours=1)
 
-        result = index_1 & index_2
+        with tm.assert_produces_warning(FutureWarning):
+            result = index_1 & index_2
         assert len(result) == 0
 
     @pytest.mark.parametrize("tz", tz)

diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
@@ -105,11 +105,13 @@ def test_symmetric_difference(idx, sort):
 def test_multiindex_symmetric_difference():
     # GH 13490
     idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
-    result = idx ^ idx
+    with tm.assert_produces_warning(FutureWarning):
+        result = idx ^ idx
     assert result.names == idx.names
 
     idx2 = idx.copy().rename(["A", "B"])
-    result = idx ^ idx2
+    with tm.assert_produces_warning(FutureWarning):
+        result = idx ^ idx2
     assert result.names == [None, None]
 
 

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -1008,7 +1008,8 @@ def test_symmetric_difference(self, sort):
         tm.assert_index_equal(result, expected)
 
         # __xor__ syntax
-        expected = index1 ^ index2
+        with tm.assert_produces_warning(FutureWarning):
+            expected = index1 ^ index2
         assert tm.equalContents(result, expected)
         assert result.name is None
 

diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -93,5 +93,18 @@ def test_union_dtypes(left, right, expected):
     right = pandas_dtype(right)
     a = pd.Index([], dtype=left)
     b = pd.Index([], dtype=right)
-    result = (a | b).dtype
+    result = a.union(b).dtype
     assert result == expected
+
+
+def test_dunder_inplace_setops_deprecated(index):
+    # GH#37374 these will become logical ops, not setops
+
+    with tm.assert_produces_warning(FutureWarning):
+        index |= index
+
+    with tm.assert_produces_warning(FutureWarning):
+        index &= index
+
+    with tm.assert_produces_warning(FutureWarning):
+        index ^= index
diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py
@@ -97,13 +97,15 @@ def test_intersection_bug_1708(self):
         index_1 = timedelta_range("1 day", periods=4, freq="h")
         index_2 = index_1 + pd.offsets.Hour(5)
 
-        result = index_1 & index_2
+        with tm.assert_produces_warning(FutureWarning):
+            result = index_1 & index_2
         assert len(result) == 0
 
         index_1 = timedelta_range("1 day", periods=4, freq="h")
         index_2 = index_1 + pd.offsets.Hour(1)
 
-        result = index_1 & index_2
+        with tm.assert_produces_warning(FutureWarning):
+            result = index_1 & index_2
         expected = timedelta_range("1 day 01:00:00", periods=3, freq="h")
         tm.assert_index_equal(result, expected)
         assert result.freq == expected.freq

diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
@@ -1112,9 +1112,9 @@ def test_resample_anchored_multiday():
     #
     # See: https://github.com/pandas-dev/pandas/issues/8683
 
-    index = pd.date_range(
-        "2014-10-14 23:06:23.206", periods=3, freq="400L"
-    ) | pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
+    index1 = pd.date_range("2014-10-14 23:06:23.206", periods=3, freq="400L")
+    index2 = pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
+    index = index1.union(index2)
 
     s = Series(np.random.randn(5), index=index)
 

diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
@@ -269,11 +269,13 @@ def test_reversed_xor_with_index_returns_index(self):
         idx2 = Index([1, 0, 1, 0])
 
         expected = Index.symmetric_difference(idx1, ser)
-        result = idx1 ^ ser
+        with tm.assert_produces_warning(FutureWarning):
+            result = idx1 ^ ser
         tm.assert_index_equal(result, expected)
 
         expected = Index.symmetric_difference(idx2, ser)
-        result = idx2 ^ ser
+        with tm.assert_produces_warning(FutureWarning):
+            result = idx2 ^ ser
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -304,11 +306,13 @@ def test_reversed_logical_op_with_index_returns_series(self, op):
         idx2 = Index([1, 0, 1, 0])
 
         expected = Series(op(idx1.values, ser.values))
-        result = op(ser, idx1)
+        with tm.assert_produces_warning(FutureWarning):
+            result = op(ser, idx1)
         tm.assert_series_equal(result, expected)
 
         expected = Series(op(idx2.values, ser.values))
-        result = op(ser, idx2)
+        with tm.assert_produces_warning(FutureWarning):
+            result = op(ser, idx2)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -324,7 +328,9 @@ def test_reverse_ops_with_index(self, op, expected):
         # multi-set Index ops are buggy, so let's avoid duplicates...
         ser = Series([True, False])
         idx = Index([False, True])
-        result = op(ser, idx)
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # behaving as set ops is deprecated, will become logical ops
+            result = op(ser, idx)
         tm.assert_index_equal(result, expected)
 
     def test_logical_ops_label_based(self):

diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -645,7 +645,9 @@ def test_str_cat_align_mixed_inputs(self, join):
         u = np.array(["A", "B", "C", "D"])
         expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
         # joint index of rhs [t, u]; u will be forced have index of s
-        rhs_idx = t.index & s.index if join == "inner" else t.index | s.index
+        rhs_idx = (
+            t.index.intersection(s.index) if join == "inner" else t.index.union(s.index)
+        )
 
         expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]
         result = s.str.cat([t, u], join=join, na_rep="-")