From bfe734fe8043b53539a8dc38eef41f13f7a66fc9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 19 Mar 2021 17:51:28 +0100 Subject: [PATCH] PERF: optimize is_numeric_v_string_like (#40501) --- pandas/core/dtypes/common.py | 53 ++++-------------------------- pandas/core/dtypes/missing.py | 3 +- pandas/tests/dtypes/test_common.py | 13 +------- 3 files changed, 8 insertions(+), 61 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 7a2d6468f1b63..32ea82d9c0402 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1100,7 +1100,7 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: # This exists to silence numpy deprecation warnings, see GH#29553 -def is_numeric_v_string_like(a, b): +def is_numeric_v_string_like(a: ArrayLike, b): """ Check if we are comparing a string-like object to a numeric ndarray. NumPy doesn't like to compare such objects, especially numeric arrays @@ -1108,7 +1108,7 @@ def is_numeric_v_string_like(a, b): Parameters ---------- - a : array-like, scalar + a : array-like The first object to check. b : array-like, scalar The second object to check. @@ -1120,16 +1120,8 @@ def is_numeric_v_string_like(a, b): Examples -------- - >>> is_numeric_v_string_like(1, 1) - False - >>> is_numeric_v_string_like("foo", "foo") - False - >>> is_numeric_v_string_like(1, "foo") # non-array numeric - False >>> is_numeric_v_string_like(np.array([1]), "foo") True - >>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check - True >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) True >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) @@ -1142,17 +1134,15 @@ def is_numeric_v_string_like(a, b): is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) - is_a_numeric_array = is_a_array and is_numeric_dtype(a) - is_b_numeric_array = is_b_array and is_numeric_dtype(b) - is_a_string_array = is_a_array and is_string_like_dtype(a) - is_b_string_array = is_b_array and is_string_like_dtype(b) + is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") + is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") + is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") + is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") - is_a_scalar_string_like = not is_a_array and isinstance(a, str) is_b_scalar_string_like = not is_b_array and isinstance(b, str) return ( (is_a_numeric_array and is_b_scalar_string_like) - or (is_b_numeric_array and is_a_scalar_string_like) or (is_a_numeric_array and is_b_string_array) or (is_b_numeric_array and is_a_string_array) ) @@ -1305,37 +1295,6 @@ def is_numeric_dtype(arr_or_dtype) -> bool: ) -def is_string_like_dtype(arr_or_dtype) -> bool: - """ - Check whether the provided array or dtype is of a string-like dtype. - - Unlike `is_string_dtype`, the object dtype is excluded because it - is a mixed dtype. - - Parameters - ---------- - arr_or_dtype : array-like - The array or dtype to check. - - Returns - ------- - boolean - Whether or not the array or dtype is of the string dtype. - - Examples - -------- - >>> is_string_like_dtype(str) - True - >>> is_string_like_dtype(object) - False - >>> is_string_like_dtype(np.array(['a', 'b'])) - True - >>> is_string_like_dtype(pd.Series([1, 2])) - False - """ - return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U")) - - def is_float_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a float dtype. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 59d6f9a51ed43..8c2cff21c114e 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -35,7 +35,6 @@ is_object_dtype, is_scalar, is_string_dtype, - is_string_like_dtype, needs_i8_conversion, ) from pandas.core.dtypes.dtypes import ExtensionDtype @@ -258,7 +257,7 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray: dtype = values.dtype shape = values.shape - if is_string_like_dtype(dtype): + if dtype.kind in ("S", "U"): result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 248798408381e..406aec9d4c16e 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -469,14 +469,11 @@ def test_is_datetime_or_timedelta_dtype(): def test_is_numeric_v_string_like(): - assert not com.is_numeric_v_string_like(1, 1) - assert not com.is_numeric_v_string_like(1, "foo") - assert not com.is_numeric_v_string_like("foo", "foo") + assert not com.is_numeric_v_string_like(np.array([1]), 1) assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) assert com.is_numeric_v_string_like(np.array([1]), "foo") - assert com.is_numeric_v_string_like("foo", np.array([1])) assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) @@ -521,14 +518,6 @@ def test_is_numeric_dtype(): assert com.is_numeric_dtype(pd.Index([1, 2.0])) -def test_is_string_like_dtype(): - assert not com.is_string_like_dtype(object) - assert not com.is_string_like_dtype(pd.Series([1, 2])) - - assert com.is_string_like_dtype(str) - assert com.is_string_like_dtype(np.array(["a", "b"])) - - def test_is_float_dtype(): assert not com.is_float_dtype(str) assert not com.is_float_dtype(int)