Skip to content

Commit

Permalink
Fix series.str.startswith(tuple) (pandas-dev#48587)
Browse files Browse the repository at this point in the history
* accept both str and tuple[str, ...] in series.str.(starts|ends)with

also add type hints and update doc strings to note pat accepts tuple

* parametrize test_startswith() and test_endswith() to include pat as tuple

* change na type hint to Scalar | None + add tuple usage examples
  • Loading branch information
janosh authored and noatamir committed Nov 9, 2022
1 parent f651a14 commit 7449053
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 17 deletions.
41 changes: 31 additions & 10 deletions pandas/core/strings/accessor.py
Expand Up @@ -19,6 +19,7 @@
from pandas._typing import (
DtypeObj,
F,
Scalar,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -2288,16 +2289,19 @@ def count(self, pat, flags=0):
return self._wrap_result(result, returns_string=False)

@forbid_nonstring_types(["bytes"])
def startswith(self, pat, na=None):
def startswith(
self, pat: str | tuple[str, ...], na: Scalar | None = None
) -> Series | Index:
"""
Test if the start of each string element matches a pattern.
Equivalent to :meth:`str.startswith`.
Parameters
----------
pat : str
Character sequence. Regular expressions are not accepted.
pat : str or tuple[str, ...]
Character sequence or tuple of strings. Regular expressions are not
accepted.
na : object, default NaN
Object shown if element tested is not a string. The default depends
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
Expand Down Expand Up @@ -2332,6 +2336,13 @@ def startswith(self, pat, na=None):
3 NaN
dtype: object
>>> s.str.startswith(('b', 'B'))
0 True
1 True
2 False
3 NaN
dtype: object
Specifying `na` to be `False` instead of `NaN`.
>>> s.str.startswith('b', na=False)
Expand All @@ -2341,23 +2352,26 @@ def startswith(self, pat, na=None):
3 False
dtype: bool
"""
if not isinstance(pat, str):
msg = f"expected a string object, not {type(pat).__name__}"
if not isinstance(pat, (str, tuple)):
msg = f"expected a string or tuple, not {type(pat).__name__}"
raise TypeError(msg)
result = self._data.array._str_startswith(pat, na=na)
return self._wrap_result(result, returns_string=False)

@forbid_nonstring_types(["bytes"])
def endswith(self, pat, na=None):
def endswith(
self, pat: str | tuple[str, ...], na: Scalar | None = None
) -> Series | Index:
"""
Test if the end of each string element matches a pattern.
Equivalent to :meth:`str.endswith`.
Parameters
----------
pat : str
Character sequence. Regular expressions are not accepted.
pat : str or tuple[str, ...]
Character sequence or tuple of strings. Regular expressions are not
accepted.
na : object, default NaN
Object shown if element tested is not a string. The default depends
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
Expand Down Expand Up @@ -2392,6 +2406,13 @@ def endswith(self, pat, na=None):
3 NaN
dtype: object
>>> s.str.endswith(('t', 'T'))
0 True
1 False
2 True
3 NaN
dtype: object
Specifying `na` to be `False` instead of `NaN`.
>>> s.str.endswith('t', na=False)
Expand All @@ -2401,8 +2422,8 @@ def endswith(self, pat, na=None):
3 False
dtype: bool
"""
if not isinstance(pat, str):
msg = f"expected a string object, not {type(pat).__name__}"
if not isinstance(pat, (str, tuple)):
msg = f"expected a string or tuple, not {type(pat).__name__}"
raise TypeError(msg)
result = self._data.array._str_endswith(pat, na=na)
return self._wrap_result(result, returns_string=False)
Expand Down
14 changes: 8 additions & 6 deletions pandas/tests/strings/test_find_replace.py
Expand Up @@ -291,21 +291,22 @@ def test_contains_nan(any_string_dtype):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
@pytest.mark.parametrize("dtype", [None, "category"])
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
@pytest.mark.parametrize("na", [True, False])
def test_startswith(dtype, null_value, na):
def test_startswith(pat, dtype, null_value, na):
# add category dtype parametrizations for GH-36241
values = Series(
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
dtype=dtype,
)

result = values.str.startswith("foo")
result = values.str.startswith(pat)
exp = Series([False, np.nan, True, False, False, np.nan, True])
tm.assert_series_equal(result, exp)

result = values.str.startswith("foo", na=na)
result = values.str.startswith(pat, na=na)
exp = Series([False, na, True, False, False, na, True])
tm.assert_series_equal(result, exp)

Expand Down Expand Up @@ -351,21 +352,22 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
@pytest.mark.parametrize("dtype", [None, "category"])
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
@pytest.mark.parametrize("na", [True, False])
def test_endswith(dtype, null_value, na):
def test_endswith(pat, dtype, null_value, na):
# add category dtype parametrizations for GH-36241
values = Series(
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
dtype=dtype,
)

result = values.str.endswith("foo")
result = values.str.endswith(pat)
exp = Series([False, np.nan, False, False, True, np.nan, True])
tm.assert_series_equal(result, exp)

result = values.str.endswith("foo", na=na)
result = values.str.endswith(pat, na=na)
exp = Series([False, na, False, False, True, na, True])
tm.assert_series_equal(result, exp)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/strings/test_strings.py
Expand Up @@ -26,7 +26,7 @@
def test_startswith_endswith_non_str_patterns(pattern):
# GH3485
ser = Series(["foo", "bar"])
msg = f"expected a string object, not {type(pattern).__name__}"
msg = f"expected a string or tuple, not {type(pattern).__name__}"
with pytest.raises(TypeError, match=msg):
ser.str.startswith(pattern)
with pytest.raises(TypeError, match=msg):
Expand Down

0 comments on commit 7449053

Please sign in to comment.