Skip to content

Commit

Permalink
Backport PR #48587 on branch 1.5.x (Fix `series.str.startswith(tuple)…
Browse files Browse the repository at this point in the history
…`) (#48593)

Backport PR #48587: Fix `series.str.startswith(tuple)`

Co-authored-by: Janosh Riebesell <janosh.riebesell@gmail.com>
  • Loading branch information
meeseeksmachine and janosh committed Sep 17, 2022
1 parent dfc00bf commit aabf659
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 17 deletions.
41 changes: 31 additions & 10 deletions pandas/core/strings/accessor.py
Expand Up @@ -18,6 +18,7 @@
from pandas._typing import (
DtypeObj,
F,
Scalar,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -2287,16 +2288,19 @@ def count(self, pat, flags=0):
return self._wrap_result(result, returns_string=False)

@forbid_nonstring_types(["bytes"])
def startswith(self, pat, na=None):
def startswith(
self, pat: str | tuple[str, ...], na: Scalar | None = None
) -> Series | Index:
"""
Test if the start of each string element matches a pattern.
Equivalent to :meth:`str.startswith`.
Parameters
----------
pat : str
Character sequence. Regular expressions are not accepted.
pat : str or tuple[str, ...]
Character sequence or tuple of strings. Regular expressions are not
accepted.
na : object, default NaN
Object shown if element tested is not a string. The default depends
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
Expand Down Expand Up @@ -2331,6 +2335,13 @@ def startswith(self, pat, na=None):
3 NaN
dtype: object
>>> s.str.startswith(('b', 'B'))
0 True
1 True
2 False
3 NaN
dtype: object
Specifying `na` to be `False` instead of `NaN`.
>>> s.str.startswith('b', na=False)
Expand All @@ -2340,23 +2351,26 @@ def startswith(self, pat, na=None):
3 False
dtype: bool
"""
if not isinstance(pat, str):
msg = f"expected a string object, not {type(pat).__name__}"
if not isinstance(pat, (str, tuple)):
msg = f"expected a string or tuple, not {type(pat).__name__}"
raise TypeError(msg)
result = self._data.array._str_startswith(pat, na=na)
return self._wrap_result(result, returns_string=False)

@forbid_nonstring_types(["bytes"])
def endswith(self, pat, na=None):
def endswith(
self, pat: str | tuple[str, ...], na: Scalar | None = None
) -> Series | Index:
"""
Test if the end of each string element matches a pattern.
Equivalent to :meth:`str.endswith`.
Parameters
----------
pat : str
Character sequence. Regular expressions are not accepted.
pat : str or tuple[str, ...]
Character sequence or tuple of strings. Regular expressions are not
accepted.
na : object, default NaN
Object shown if element tested is not a string. The default depends
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
Expand Down Expand Up @@ -2391,6 +2405,13 @@ def endswith(self, pat, na=None):
3 NaN
dtype: object
>>> s.str.endswith(('t', 'T'))
0 True
1 False
2 True
3 NaN
dtype: object
Specifying `na` to be `False` instead of `NaN`.
>>> s.str.endswith('t', na=False)
Expand All @@ -2400,8 +2421,8 @@ def endswith(self, pat, na=None):
3 False
dtype: bool
"""
if not isinstance(pat, str):
msg = f"expected a string object, not {type(pat).__name__}"
if not isinstance(pat, (str, tuple)):
msg = f"expected a string or tuple, not {type(pat).__name__}"
raise TypeError(msg)
result = self._data.array._str_endswith(pat, na=na)
return self._wrap_result(result, returns_string=False)
Expand Down
14 changes: 8 additions & 6 deletions pandas/tests/strings/test_find_replace.py
Expand Up @@ -291,21 +291,22 @@ def test_contains_nan(any_string_dtype):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
@pytest.mark.parametrize("dtype", [None, "category"])
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
@pytest.mark.parametrize("na", [True, False])
def test_startswith(dtype, null_value, na):
def test_startswith(pat, dtype, null_value, na):
# add category dtype parametrizations for GH-36241
values = Series(
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
dtype=dtype,
)

result = values.str.startswith("foo")
result = values.str.startswith(pat)
exp = Series([False, np.nan, True, False, False, np.nan, True])
tm.assert_series_equal(result, exp)

result = values.str.startswith("foo", na=na)
result = values.str.startswith(pat, na=na)
exp = Series([False, na, True, False, False, na, True])
tm.assert_series_equal(result, exp)

Expand Down Expand Up @@ -351,21 +352,22 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
@pytest.mark.parametrize("dtype", [None, "category"])
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
@pytest.mark.parametrize("na", [True, False])
def test_endswith(dtype, null_value, na):
def test_endswith(pat, dtype, null_value, na):
# add category dtype parametrizations for GH-36241
values = Series(
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
dtype=dtype,
)

result = values.str.endswith("foo")
result = values.str.endswith(pat)
exp = Series([False, np.nan, False, False, True, np.nan, True])
tm.assert_series_equal(result, exp)

result = values.str.endswith("foo", na=na)
result = values.str.endswith(pat, na=na)
exp = Series([False, na, False, False, True, na, True])
tm.assert_series_equal(result, exp)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/strings/test_strings.py
Expand Up @@ -26,7 +26,7 @@
def test_startswith_endswith_non_str_patterns(pattern):
# GH3485
ser = Series(["foo", "bar"])
msg = f"expected a string object, not {type(pattern).__name__}"
msg = f"expected a string or tuple, not {type(pattern).__name__}"
with pytest.raises(TypeError, match=msg):
ser.str.startswith(pattern)
with pytest.raises(TypeError, match=msg):
Expand Down

0 comments on commit aabf659

Please sign in to comment.