DEPR: Change str.replace(regex) from True to False & single behavior (p…

…andas-dev#49486) * DEPR: Change str.replace(regex) from True to False & single behavior * Add versionnchanged
phofl · Nov 9, 2022 · af0da59 · af0da59
1 parent 3f62b90
commit af0da59
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 70 deletions.
diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
@@ -267,14 +267,16 @@ i.e., from the end of the string to the beginning of the string:
    s3
    s3.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
 
-.. warning::
 
-    Some caution must be taken when dealing with regular expressions! The current behavior
-    is to treat single character patterns as literal strings, even when ``regex`` is set
-    to ``True``. This behavior is deprecated and will be removed in a future version so
-    that the ``regex`` keyword is always respected.
+.. versionchanged:: 2.0
+
+Single character pattern with ``regex=True`` will also be treated as regular expressions:
+
+.. ipython:: python
 
-.. versionchanged:: 1.2.0
+   s4 = pd.Series(["a.b", ".", "b", np.nan, ""], dtype="string")
+   s4
+   s4.str.replace(".", "a", regex=True)
 
 If you want literal replacement of a string (equivalent to :meth:`str.replace`), you
 can set the optional ``regex`` parameter to ``False``, rather than escaping each

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -322,6 +322,7 @@ Removal of prior version deprecations/changes
 - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
 - Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`)
 - Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`)
+- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`)
 - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
 - Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`)
 - Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -1323,7 +1323,7 @@ def replace(
         n: int = -1,
         case: bool | None = None,
         flags: int = 0,
-        regex: bool | None = None,
+        regex: bool = False,
     ):
         r"""
         Replace each occurrence of pattern/regex in the Series/Index.
@@ -1351,16 +1351,14 @@ def replace(
         flags : int, default 0 (no flags)
             Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled
             regex.
-        regex : bool, default True
+        regex : bool, default False
             Determines if the passed-in pattern is a regular expression:
 
             - If True, assumes the passed-in pattern is a regular expression.
             - If False, treats the pattern as a literal string
             - Cannot be set to False if `pat` is a compiled regex or `repl` is
               a callable.
 
-            .. versionadded:: 0.23.0
-
         Returns
         -------
         Series or Index of object
@@ -1444,20 +1442,6 @@ def replace(
         2    NaN
         dtype: object
         """
-        if regex is None:
-            if isinstance(pat, str) and any(c in pat for c in ".+*|^$?[](){}\\"):
-                # warn only in cases where regex behavior would differ from literal
-                msg = (
-                    "The default value of regex will change from True to False "
-                    "in a future version."
-                )
-                if len(pat) == 1:
-                    msg += (
-                        " In addition, single character regular expressions will "
-                        "*not* be treated as literal strings when regex=True."
-                    )
-                warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
-
         # Check whether repl is valid (GH 13438, GH 15055)
         if not (isinstance(repl, str) or callable(repl)):
             raise TypeError("repl must be a string or callable")
@@ -1476,14 +1460,6 @@ def replace(
         elif callable(repl):
             raise ValueError("Cannot use a callable replacement when regex=False")
 
-        # The current behavior is to treat single character patterns as literal strings,
-        # even when ``regex`` is set to ``True``.
-        if isinstance(pat, str) and len(pat) == 1:
-            regex = False
-
-        if regex is None:
-            regex = True
-
         if case is None:
             case = True
 

diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
@@ -423,7 +423,7 @@ def test_replace_callable_raises(any_string_dtype, repl):
         with tm.maybe_produces_warning(
             PerformanceWarning, any_string_dtype == "string[pyarrow]"
         ):
-            values.str.replace("a", repl)
+            values.str.replace("a", repl, regex=True)
 
 
 def test_replace_callable_named_groups(any_string_dtype):
@@ -477,7 +477,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype):
     with tm.maybe_produces_warning(
         PerformanceWarning, any_string_dtype == "string[pyarrow]"
     ):
-        result = ser.str.replace(pat, ", ")
+        result = ser.str.replace(pat, ", ", regex=True)
     tm.assert_series_equal(result, expected)
 
 
@@ -490,13 +490,13 @@ def test_replace_compiled_regex_raises(any_string_dtype):
     msg = "case and flags cannot be set when pat is a compiled regex"
 
     with pytest.raises(ValueError, match=msg):
-        ser.str.replace(pat, "", flags=re.IGNORECASE)
+        ser.str.replace(pat, "", flags=re.IGNORECASE, regex=True)
 
     with pytest.raises(ValueError, match=msg):
-        ser.str.replace(pat, "", case=False)
+        ser.str.replace(pat, "", case=False, regex=True)
 
     with pytest.raises(ValueError, match=msg):
-        ser.str.replace(pat, "", case=True)
+        ser.str.replace(pat, "", case=True, regex=True)
 
 
 def test_replace_compiled_regex_callable(any_string_dtype):
@@ -507,7 +507,7 @@ def test_replace_compiled_regex_callable(any_string_dtype):
     with tm.maybe_produces_warning(
         PerformanceWarning, any_string_dtype == "string[pyarrow]"
     ):
-        result = ser.str.replace(pat, repl, n=2)
+        result = ser.str.replace(pat, repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -617,48 +617,25 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
-def test_replace_regex_default_warning(any_string_dtype):
+def test_replace_regex(any_string_dtype):
     # https://github.com/pandas-dev/pandas/pull/24809
     s = Series(["a", "b", "ac", np.nan, ""], dtype=any_string_dtype)
-    msg = (
-        "The default value of regex will change from True to False in a "
-        "future version\\.$"
-    )
-
-    with tm.assert_produces_warning(
-        FutureWarning,
-        match=msg,
-        raise_on_extra_warnings=any_string_dtype != "string[pyarrow]",
-    ):
-        result = s.str.replace("^.$", "a")
+    result = s.str.replace("^.$", "a", regex=True)
     expected = Series(["a", "a", "ac", np.nan, ""], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("regex", [True, False, None])
+@pytest.mark.parametrize("regex", [True, False])
 def test_replace_regex_single_character(regex, any_string_dtype):
-    # https://github.com/pandas-dev/pandas/pull/24809
-
-    # The current behavior is to treat single character patterns as literal strings,
-    # even when ``regex`` is set to ``True``.
-
+    # https://github.com/pandas-dev/pandas/pull/24809, enforced in 2.0
+    # GH 24804
     s = Series(["a.b", ".", "b", np.nan, ""], dtype=any_string_dtype)
 
-    if regex is None:
-        msg = re.escape(
-            "The default value of regex will change from True to False in a future "
-            "version. In addition, single character regular expressions will *not* "
-            "be treated as literal strings when regex=True."
-        )
-        with tm.assert_produces_warning(
-            FutureWarning,
-            match=msg,
-        ):
-            result = s.str.replace(".", "a", regex=regex)
+    result = s.str.replace(".", "a", regex=regex)
+    if regex:
+        expected = Series(["aaa", "a", "a", np.nan, ""], dtype=any_string_dtype)
     else:
-        result = s.str.replace(".", "a", regex=regex)
-
-    expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype)
+        expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)