Skip to content

Commit

Permalink
CLN: Stopped casting values in Series/Index.isin for datelike dtypes (#…
Browse files Browse the repository at this point in the history
…58645)

* CLN: Stopped casting values in Series/Index.isin for datelike dtypes

* Support mixed case

* Avoid infer_objects
  • Loading branch information
mroeschke committed May 17, 2024
1 parent 51f891e commit 810fe4f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 42 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ Removal of prior version deprecations/changes
- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`)
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
- Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
Expand Down
39 changes: 5 additions & 34 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,14 +759,6 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
values = ensure_wrapped_if_datetimelike(values)

if not isinstance(values, type(self)):
inferable = [
"timedelta",
"timedelta64",
"datetime",
"datetime64",
"date",
"period",
]
if values.dtype == object:
values = lib.maybe_convert_objects(
values, # type: ignore[arg-type]
Expand All @@ -775,32 +767,11 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
)
if values.dtype != object:
return self.isin(values)

inferred = lib.infer_dtype(values, skipna=False)
if inferred not in inferable:
if inferred == "string":
pass

elif "mixed" in inferred:
return isin(self.astype(object), values)
else:
return np.zeros(self.shape, dtype=bool)

try:
values = type(self)._from_sequence(values)
except ValueError:
return isin(self.astype(object), values)
else:
warnings.warn(
# GH#53111
f"The behavior of 'isin' with dtype={self.dtype} and "
"castable values (e.g. strings) is deprecated. In a "
"future version, these will not be considered matching "
"by isin. Explicitly cast to the appropriate dtype before "
"calling isin instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
# TODO: Deprecate this case
# https://github.com/pandas-dev/pandas/pull/58645/files#r1604055791
return isin(self.astype(object), values)
return np.zeros(self.shape, dtype=bool)

if self.dtype.kind in "mM":
self = cast("DatetimeArray | TimedeltaArray", self)
Expand Down
13 changes: 5 additions & 8 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,21 +990,18 @@ def test_isin_datetimelike_all_nat(self, dtype):
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]"])
def test_isin_datetimelike_strings_deprecated(self, dtype):
def test_isin_datetimelike_strings_returns_false(self, dtype):
# GH#53111
dta = date_range("2013-01-01", periods=3)._values
arr = Series(dta.view("i8")).array.view(dtype)

vals = [str(x) for x in arr]
msg = "The behavior of 'isin' with dtype=.* is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = algos.isin(arr, vals)
assert res.all()
res = algos.isin(arr, vals)
assert not res.any()

vals2 = np.array(vals, dtype=str)
with tm.assert_produces_warning(FutureWarning, match=msg):
res2 = algos.isin(arr, vals2)
assert res2.all()
res2 = algos.isin(arr, vals2)
assert not res2.any()

def test_isin_dt64tz_with_nat(self):
# the all-NaT values used to get inferred to tznaive, which was evaluated
Expand Down

0 comments on commit 810fe4f

Please sign in to comment.