From aabf6597f45436e9ada915ac15d3708f9d4948ca Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 17 Sep 2022 16:24:16 +0200 Subject: [PATCH] Backport PR #48587 on branch 1.5.x (Fix `series.str.startswith(tuple)`) (#48593) Backport PR #48587: Fix `series.str.startswith(tuple)` Co-authored-by: Janosh Riebesell --- pandas/core/strings/accessor.py | 41 +++++++++++++++++------ pandas/tests/strings/test_find_replace.py | 14 ++++---- pandas/tests/strings/test_strings.py | 2 +- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 44ebfbd7f3e9c..0ee9f15c3630c 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -18,6 +18,7 @@ from pandas._typing import ( DtypeObj, F, + Scalar, ) from pandas.util._decorators import ( Appender, @@ -2287,7 +2288,9 @@ def count(self, pat, flags=0): return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) - def startswith(self, pat, na=None): + def startswith( + self, pat: str | tuple[str, ...], na: Scalar | None = None + ) -> Series | Index: """ Test if the start of each string element matches a pattern. @@ -2295,8 +2298,9 @@ def startswith(self, pat, na=None): Parameters ---------- - pat : str - Character sequence. Regular expressions are not accepted. + pat : str or tuple[str, ...] + Character sequence or tuple of strings. Regular expressions are not + accepted. na : object, default NaN Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, ``numpy.nan`` is used. @@ -2331,6 +2335,13 @@ def startswith(self, pat, na=None): 3 NaN dtype: object + >>> s.str.startswith(('b', 'B')) + 0 True + 1 True + 2 False + 3 NaN + dtype: object + Specifying `na` to be `False` instead of `NaN`. >>> s.str.startswith('b', na=False) @@ -2340,14 +2351,16 @@ def startswith(self, pat, na=None): 3 False dtype: bool """ - if not isinstance(pat, str): - msg = f"expected a string object, not {type(pat).__name__}" + if not isinstance(pat, (str, tuple)): + msg = f"expected a string or tuple, not {type(pat).__name__}" raise TypeError(msg) result = self._data.array._str_startswith(pat, na=na) return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) - def endswith(self, pat, na=None): + def endswith( + self, pat: str | tuple[str, ...], na: Scalar | None = None + ) -> Series | Index: """ Test if the end of each string element matches a pattern. @@ -2355,8 +2368,9 @@ def endswith(self, pat, na=None): Parameters ---------- - pat : str - Character sequence. Regular expressions are not accepted. + pat : str or tuple[str, ...] + Character sequence or tuple of strings. Regular expressions are not + accepted. na : object, default NaN Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, ``numpy.nan`` is used. @@ -2391,6 +2405,13 @@ def endswith(self, pat, na=None): 3 NaN dtype: object + >>> s.str.endswith(('t', 'T')) + 0 True + 1 False + 2 True + 3 NaN + dtype: object + Specifying `na` to be `False` instead of `NaN`. >>> s.str.endswith('t', na=False) @@ -2400,8 +2421,8 @@ def endswith(self, pat, na=None): 3 False dtype: bool """ - if not isinstance(pat, str): - msg = f"expected a string object, not {type(pat).__name__}" + if not isinstance(pat, (str, tuple)): + msg = f"expected a string or tuple, not {type(pat).__name__}" raise TypeError(msg) result = self._data.array._str_endswith(pat, na=na) return self._wrap_result(result, returns_string=False) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 1c74950e30c40..62f9478bf25ff 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -291,21 +291,22 @@ def test_contains_nan(any_string_dtype): # -------------------------------------------------------------------------------------- +@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")]) @pytest.mark.parametrize("dtype", [None, "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) -def test_startswith(dtype, null_value, na): +def test_startswith(pat, dtype, null_value, na): # add category dtype parametrizations for GH-36241 values = Series( ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype, ) - result = values.str.startswith("foo") + result = values.str.startswith(pat) exp = Series([False, np.nan, True, False, False, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.startswith("foo", na=na) + result = values.str.startswith(pat, na=na) exp = Series([False, na, True, False, False, na, True]) tm.assert_series_equal(result, exp) @@ -351,21 +352,22 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- +@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")]) @pytest.mark.parametrize("dtype", [None, "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) -def test_endswith(dtype, null_value, na): +def test_endswith(pat, dtype, null_value, na): # add category dtype parametrizations for GH-36241 values = Series( ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype, ) - result = values.str.endswith("foo") + result = values.str.endswith(pat) exp = Series([False, np.nan, False, False, True, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.endswith("foo", na=na) + result = values.str.endswith(pat, na=na) exp = Series([False, na, False, False, True, na, True]) tm.assert_series_equal(result, exp) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index ffa8b557d2379..4b25752940418 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -26,7 +26,7 @@ def test_startswith_endswith_non_str_patterns(pattern): # GH3485 ser = Series(["foo", "bar"]) - msg = f"expected a string object, not {type(pattern).__name__}" + msg = f"expected a string or tuple, not {type(pattern).__name__}" with pytest.raises(TypeError, match=msg): ser.str.startswith(pattern) with pytest.raises(TypeError, match=msg):