Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix series.str.startswith(tuple) #48587

Merged
merged 3 commits into from Sep 16, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 13 additions & 10 deletions pandas/core/strings/accessor.py
Expand Up @@ -6,6 +6,7 @@
import re
from typing import (
TYPE_CHECKING,
Any,
Callable,
Hashable,
Iterator,
Expand Down Expand Up @@ -2288,16 +2289,17 @@ def count(self, pat, flags=0):
return self._wrap_result(result, returns_string=False)

@forbid_nonstring_types(["bytes"])
def startswith(self, pat, na=None):
def startswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Index:
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
"""
Test if the start of each string element matches a pattern.

Equivalent to :meth:`str.startswith`.

Parameters
----------
pat : str
Character sequence. Regular expressions are not accepted.
pat : str or tuple[str, ...]
Character sequence or tuple of strings. Regular expressions are not
accepted.
na : object, default NaN
Object shown if element tested is not a string. The default depends
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
Expand Down Expand Up @@ -2341,23 +2343,24 @@ def startswith(self, pat, na=None):
3 False
dtype: bool
"""
if not isinstance(pat, str):
msg = f"expected a string object, not {type(pat).__name__}"
if not isinstance(pat, (str, tuple)):
msg = f"expected a string or tuple, not {type(pat).__name__}"
raise TypeError(msg)
result = self._data.array._str_startswith(pat, na=na)
return self._wrap_result(result, returns_string=False)

@forbid_nonstring_types(["bytes"])
def endswith(self, pat, na=None):
def endswith(self, pat: str | tuple[str, ...], na: Any = None) -> Series | Index:
"""
Test if the end of each string element matches a pattern.

Equivalent to :meth:`str.endswith`.

Parameters
----------
pat : str
Character sequence. Regular expressions are not accepted.
pat : str or tuple[str, ...]
Character sequence or tuple of strings. Regular expressions are not
accepted.
na : object, default NaN
Object shown if element tested is not a string. The default depends
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
Expand Down Expand Up @@ -2401,8 +2404,8 @@ def endswith(self, pat, na=None):
3 False
dtype: bool
"""
if not isinstance(pat, str):
msg = f"expected a string object, not {type(pat).__name__}"
if not isinstance(pat, (str, tuple)):
msg = f"expected a string or tuple, not {type(pat).__name__}"
raise TypeError(msg)
result = self._data.array._str_endswith(pat, na=na)
return self._wrap_result(result, returns_string=False)
Expand Down
14 changes: 8 additions & 6 deletions pandas/tests/strings/test_find_replace.py
Expand Up @@ -291,21 +291,22 @@ def test_contains_nan(any_string_dtype):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
@pytest.mark.parametrize("dtype", [None, "category"])
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
@pytest.mark.parametrize("na", [True, False])
def test_startswith(dtype, null_value, na):
def test_startswith(pat, dtype, null_value, na):
# add category dtype parametrizations for GH-36241
values = Series(
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
dtype=dtype,
)

result = values.str.startswith("foo")
result = values.str.startswith(pat)
exp = Series([False, np.nan, True, False, False, np.nan, True])
tm.assert_series_equal(result, exp)

result = values.str.startswith("foo", na=na)
result = values.str.startswith(pat, na=na)
exp = Series([False, na, True, False, False, na, True])
tm.assert_series_equal(result, exp)

Expand Down Expand Up @@ -351,21 +352,22 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
@pytest.mark.parametrize("dtype", [None, "category"])
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
@pytest.mark.parametrize("na", [True, False])
def test_endswith(dtype, null_value, na):
def test_endswith(pat, dtype, null_value, na):
# add category dtype parametrizations for GH-36241
values = Series(
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
dtype=dtype,
)

result = values.str.endswith("foo")
result = values.str.endswith(pat)
exp = Series([False, np.nan, False, False, True, np.nan, True])
tm.assert_series_equal(result, exp)

result = values.str.endswith("foo", na=na)
result = values.str.endswith(pat, na=na)
exp = Series([False, na, False, False, True, na, True])
tm.assert_series_equal(result, exp)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/strings/test_strings.py
Expand Up @@ -26,7 +26,7 @@
def test_startswith_endswith_non_str_patterns(pattern):
# GH3485
ser = Series(["foo", "bar"])
msg = f"expected a string object, not {type(pattern).__name__}"
msg = f"expected a string or tuple, not {type(pattern).__name__}"
with pytest.raises(TypeError, match=msg):
ser.str.startswith(pattern)
with pytest.raises(TypeError, match=msg):
Expand Down