Skip to content

Commit

Permalink
PERF: optimize is_numeric_v_string_like (#40501)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Mar 19, 2021
1 parent bbe34fc commit bfe734f
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 61 deletions.
53 changes: 6 additions & 47 deletions pandas/core/dtypes/common.py
Expand Up @@ -1100,15 +1100,15 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool:


# This exists to silence numpy deprecation warnings, see GH#29553
def is_numeric_v_string_like(a, b):
def is_numeric_v_string_like(a: ArrayLike, b):
"""
Check if we are comparing a string-like object to a numeric ndarray.
NumPy doesn't like to compare such objects, especially numeric arrays
and scalar string-likes.
Parameters
----------
a : array-like, scalar
a : array-like
The first object to check.
b : array-like, scalar
The second object to check.
Expand All @@ -1120,16 +1120,8 @@ def is_numeric_v_string_like(a, b):
Examples
--------
>>> is_numeric_v_string_like(1, 1)
False
>>> is_numeric_v_string_like("foo", "foo")
False
>>> is_numeric_v_string_like(1, "foo") # non-array numeric
False
>>> is_numeric_v_string_like(np.array([1]), "foo")
True
>>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check
True
>>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
True
>>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
Expand All @@ -1142,17 +1134,15 @@ def is_numeric_v_string_like(a, b):
is_a_array = isinstance(a, np.ndarray)
is_b_array = isinstance(b, np.ndarray)

is_a_numeric_array = is_a_array and is_numeric_dtype(a)
is_b_numeric_array = is_b_array and is_numeric_dtype(b)
is_a_string_array = is_a_array and is_string_like_dtype(a)
is_b_string_array = is_b_array and is_string_like_dtype(b)
is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")

is_a_scalar_string_like = not is_a_array and isinstance(a, str)
is_b_scalar_string_like = not is_b_array and isinstance(b, str)

return (
(is_a_numeric_array and is_b_scalar_string_like)
or (is_b_numeric_array and is_a_scalar_string_like)
or (is_a_numeric_array and is_b_string_array)
or (is_b_numeric_array and is_a_string_array)
)
Expand Down Expand Up @@ -1305,37 +1295,6 @@ def is_numeric_dtype(arr_or_dtype) -> bool:
)


def is_string_like_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a string-like dtype.
Unlike `is_string_dtype`, the object dtype is excluded because it
is a mixed dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of the string dtype.
Examples
--------
>>> is_string_like_dtype(str)
True
>>> is_string_like_dtype(object)
False
>>> is_string_like_dtype(np.array(['a', 'b']))
True
>>> is_string_like_dtype(pd.Series([1, 2]))
False
"""
return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U"))


def is_float_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a float dtype.
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/dtypes/missing.py
Expand Up @@ -35,7 +35,6 @@
is_object_dtype,
is_scalar,
is_string_dtype,
is_string_like_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
Expand Down Expand Up @@ -258,7 +257,7 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray:
dtype = values.dtype
shape = values.shape

if is_string_like_dtype(dtype):
if dtype.kind in ("S", "U"):
result = np.zeros(values.shape, dtype=bool)
else:
result = np.empty(shape, dtype=bool)
Expand Down
13 changes: 1 addition & 12 deletions pandas/tests/dtypes/test_common.py
Expand Up @@ -469,14 +469,11 @@ def test_is_datetime_or_timedelta_dtype():


def test_is_numeric_v_string_like():
assert not com.is_numeric_v_string_like(1, 1)
assert not com.is_numeric_v_string_like(1, "foo")
assert not com.is_numeric_v_string_like("foo", "foo")
assert not com.is_numeric_v_string_like(np.array([1]), 1)
assert not com.is_numeric_v_string_like(np.array([1]), np.array([2]))
assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))

assert com.is_numeric_v_string_like(np.array([1]), "foo")
assert com.is_numeric_v_string_like("foo", np.array([1]))
assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))

Expand Down Expand Up @@ -521,14 +518,6 @@ def test_is_numeric_dtype():
assert com.is_numeric_dtype(pd.Index([1, 2.0]))


def test_is_string_like_dtype():
assert not com.is_string_like_dtype(object)
assert not com.is_string_like_dtype(pd.Series([1, 2]))

assert com.is_string_like_dtype(str)
assert com.is_string_like_dtype(np.array(["a", "b"]))


def test_is_float_dtype():
assert not com.is_float_dtype(str)
assert not com.is_float_dtype(int)
Expand Down

0 comments on commit bfe734f

Please sign in to comment.