From 3698aa76261ab0d19c0d08f221eb604e5ce19536 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 16 Sep 2022 10:53:21 -0700 Subject: [PATCH 1/3] accept both str and tuple[str, ...] in series.str.(starts|ends)with also add type hints and update doc strings to note pat accepts tuple --- pandas/core/strings/accessor.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index f9852005314a4..89f54304a1040 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -6,6 +6,7 @@ import re from typing import ( TYPE_CHECKING, + Any, Callable, Hashable, Iterator, @@ -2288,7 +2289,7 @@ def count(self, pat, flags=0): return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) - def startswith(self, pat, na=None): + def startswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Index: """ Test if the start of each string element matches a pattern. @@ -2296,8 +2297,9 @@ def startswith(self, pat, na=None): Parameters ---------- - pat : str - Character sequence. Regular expressions are not accepted. + pat : str or tuple[str, ...] + Character sequence or tuple of strings. Regular expressions are not + accepted. na : object, default NaN Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, ``numpy.nan`` is used. @@ -2341,14 +2343,14 @@ def startswith(self, pat, na=None): 3 False dtype: bool """ - if not isinstance(pat, str): - msg = f"expected a string object, not {type(pat).__name__}" + if not isinstance(pat, (str, tuple)): + msg = f"expected a string or tuple, not {type(pat).__name__}" raise TypeError(msg) result = self._data.array._str_startswith(pat, na=na) return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) - def endswith(self, pat, na=None): + def endswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Index: """ Test if the end of each string element matches a pattern. @@ -2356,8 +2358,9 @@ def endswith(self, pat, na=None): Parameters ---------- - pat : str - Character sequence. Regular expressions are not accepted. + pat : str or tuple[str, ...] + Character sequence or tuple of strings. Regular expressions are not + accepted. na : object, default NaN Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, ``numpy.nan`` is used. @@ -2401,8 +2404,8 @@ def endswith(self, pat, na=None): 3 False dtype: bool """ - if not isinstance(pat, str): - msg = f"expected a string object, not {type(pat).__name__}" + if not isinstance(pat, (str, tuple)): + msg = f"expected a string or tuple, not {type(pat).__name__}" raise TypeError(msg) result = self._data.array._str_endswith(pat, na=na) return self._wrap_result(result, returns_string=False) From 7d0de51cbc2ce424e0b37f0c0a011dc0159bdfb1 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 16 Sep 2022 10:54:54 -0700 Subject: [PATCH 2/3] parametrize test_startswith() and test_endswith() to include pat as tuple --- pandas/tests/strings/test_find_replace.py | 14 ++++++++------ pandas/tests/strings/test_strings.py | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 1c74950e30c40..62f9478bf25ff 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -291,21 +291,22 @@ def test_contains_nan(any_string_dtype): # -------------------------------------------------------------------------------------- +@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")]) @pytest.mark.parametrize("dtype", [None, "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) -def test_startswith(dtype, null_value, na): +def test_startswith(pat, dtype, null_value, na): # add category dtype parametrizations for GH-36241 values = Series( ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype, ) - result = values.str.startswith("foo") + result = values.str.startswith(pat) exp = Series([False, np.nan, True, False, False, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.startswith("foo", na=na) + result = values.str.startswith(pat, na=na) exp = Series([False, na, True, False, False, na, True]) tm.assert_series_equal(result, exp) @@ -351,21 +352,22 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- +@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")]) @pytest.mark.parametrize("dtype", [None, "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) -def test_endswith(dtype, null_value, na): +def test_endswith(pat, dtype, null_value, na): # add category dtype parametrizations for GH-36241 values = Series( ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype, ) - result = values.str.endswith("foo") + result = values.str.endswith(pat) exp = Series([False, np.nan, False, False, True, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.endswith("foo", na=na) + result = values.str.endswith(pat, na=na) exp = Series([False, na, False, False, True, na, True]) tm.assert_series_equal(result, exp) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index ffa8b557d2379..4b25752940418 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -26,7 +26,7 @@ def test_startswith_endswith_non_str_patterns(pattern): # GH3485 ser = Series(["foo", "bar"]) - msg = f"expected a string object, not {type(pattern).__name__}" + msg = f"expected a string or tuple, not {type(pattern).__name__}" with pytest.raises(TypeError, match=msg): ser.str.startswith(pattern) with pytest.raises(TypeError, match=msg): From 003ae98e9c52410864c150349ba85038469fb32a Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 16 Sep 2022 13:16:00 -0700 Subject: [PATCH 3/3] change na type hint to Scalar | None + add tuple usage examples --- pandas/core/strings/accessor.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 89f54304a1040..46628eb3e17dd 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -6,7 +6,6 @@ import re from typing import ( TYPE_CHECKING, - Any, Callable, Hashable, Iterator, @@ -20,6 +19,7 @@ from pandas._typing import ( DtypeObj, F, + Scalar, ) from pandas.util._decorators import ( Appender, @@ -2289,7 +2289,9 @@ def count(self, pat, flags=0): return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) - def startswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Index: + def startswith( + self, pat: str | tuple[str, ...], na: Scalar | None = None + ) -> Series | Index: """ Test if the start of each string element matches a pattern. @@ -2334,6 +2336,13 @@ def startswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Ind 3 NaN dtype: object + >>> s.str.startswith(('b', 'B')) + 0 True + 1 True + 2 False + 3 NaN + dtype: object + Specifying `na` to be `False` instead of `NaN`. >>> s.str.startswith('b', na=False) @@ -2350,7 +2359,9 @@ def startswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Ind return self._wrap_result(result, returns_string=False) @forbid_nonstring_types(["bytes"]) - def endswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Index: + def endswith( + self, pat: str | tuple[str, ...], na: Scalar | None = None + ) -> Series | Index: """ Test if the end of each string element matches a pattern. @@ -2395,6 +2406,13 @@ def endswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Index 3 NaN dtype: object + >>> s.str.endswith(('t', 'T')) + 0 True + 1 False + 2 True + 3 NaN + dtype: object + Specifying `na` to be `False` instead of `NaN`. >>> s.str.endswith('t', na=False)