Regression fix: leave R prefixes capitalization alone

`black.strings.get_string_prefix` used to lowercase the extracted prefix before returning it. This is wrong because 1) it ignores the fact we should leave R prefixes alone because of MagicPython, and 2) there is dedicated prefix casing handling code that fixes issue 1. `.lower` is too naive. This was originally fixed in 20.8b0, but was reintroduced since 21.4b0. I also added proper prefix normalization for docstrings by using the `black.strings.normalize_string_prefix` helper. Some more test strings were added to make sure strings with capitalized prefixes aren't treated differently (actually happened with my original patch, Jelle had to point it out to me).
psf · Jun 4, 2021 · c2ce5cf · c2ce5cf
1 parent c53b3ad
commit c2ce5cf
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 8 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -8,6 +8,7 @@
 - Fixed option usage when using the `--code` flag (#2259)
 - Do not call `uvloop.install()` when _Black_ is used as a library (#2303)
 - Added `--required-version` option to require a specific version to be running (#2300)
+- Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
 
 ## 21.5b2
 

diff --git a/src/black/linegen.py b/src/black/linegen.py
@@ -226,8 +226,9 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
         if is_docstring(leaf) and "\\\n" not in leaf.value:
             # We're ignoring docstrings with backslash newline escapes because changing
             # indentation of those changes the AST representation of the code.
-            prefix = get_string_prefix(leaf.value)
-            docstring = leaf.value[len(prefix) :]  # Remove the prefix
+            docstring = normalize_string_prefix(leaf.value, self.remove_u_prefix)
+            prefix = get_string_prefix(docstring)
+            docstring = docstring[len(prefix) :]  # Remove the prefix
             quote_char = docstring[0]
             # A natural way to remove the outer quotes is to do:
             #   docstring = docstring.strip(quote_char)

diff --git a/src/black/strings.py b/src/black/strings.py
@@ -87,7 +87,7 @@ def get_string_prefix(string: str) -> str:
     prefix = ""
     prefix_idx = 0
     while string[prefix_idx] in STRING_PREFIX_CHARS:
-        prefix += string[prefix_idx].lower()
+        prefix += string[prefix_idx]
         prefix_idx += 1
 
     return prefix

diff --git a/src/black/trans.py b/src/black/trans.py
@@ -411,7 +411,7 @@ def make_naked(string: str, string_prefix: str) -> str:
             and is_valid_index(next_str_idx)
             and LL[next_str_idx].type == token.STRING
         ):
-            prefix = get_string_prefix(LL[next_str_idx].value)
+            prefix = get_string_prefix(LL[next_str_idx].value).lower()
             next_str_idx += 1
 
         # The next loop merges the string group. The final string will be
@@ -431,7 +431,7 @@ def make_naked(string: str, string_prefix: str) -> str:
             num_of_strings += 1
 
             SS = LL[next_str_idx].value
-            next_prefix = get_string_prefix(SS)
+            next_prefix = get_string_prefix(SS).lower()
 
             # If this is an f-string group but this substring is not prefixed
             # with 'f'...
@@ -541,7 +541,7 @@ def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
                 return TErr("StringMerger does NOT merge multiline strings.")
 
             num_of_strings += 1
-            prefix = get_string_prefix(leaf.value)
+            prefix = get_string_prefix(leaf.value).lower()
             if "r" in prefix:
                 return TErr("StringMerger does NOT merge raw strings.")
 
@@ -1036,7 +1036,7 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
         is_valid_index = is_valid_index_factory(LL)
         insert_str_child = insert_str_child_factory(LL[string_idx])
 
-        prefix = get_string_prefix(LL[string_idx].value)
+        prefix = get_string_prefix(LL[string_idx].value).lower()
 
         # We MAY choose to drop the 'f' prefix from substrings that don't
         # contain any f-expressions, but ONLY if the original f-string
@@ -1279,7 +1279,7 @@ def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
 
             yield from _fexpr_slices
 
-        is_fstring = "f" in get_string_prefix(string)
+        is_fstring = "f" in get_string_prefix(string).lower()
 
         def breaks_fstring_expression(i: Index) -> bool:
             """

diff --git a/tests/data/long_strings__regression.py b/tests/data/long_strings__regression.py
@@ -446,6 +446,26 @@ def _legacy_listen_examples():
 assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 
+# It shouldn't matter if the string prefixes are capitalized.
+temp_msg = (
+    F"{F'{humanize_number(pos)}.': <{pound_len+2}} "
+    F"{balance: <{bal_len + 5}} "
+    F"<<{author.display_name}>>\n"
+)
+
+fstring = (
+    F"We have to remember to escape {braces}."
+    " Like {these}."
+    F" But not {this}."
+)
+
+welcome_to_programming = R"hello," R" world!"
+
+fstring = F"f-strings definitely make things more {difficult} than they need to be for {{black}}. But boy they sure are handy. The problem is that some lines will need to have the 'f' whereas others do not. This {line}, for example, needs one."
+
+x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."
+
+
 # output
 
 
@@ -1002,3 +1022,25 @@ def _legacy_listen_examples():
     " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$',"
     " 'rykangaroo$', 'ykangaroo$']"
 )
+
+# It shouldn't matter if the string prefixes are capitalized.
+temp_msg = (
+    f"{F'{humanize_number(pos)}.': <{pound_len+2}} "
+    f"{balance: <{bal_len + 5}} "
+    f"<<{author.display_name}>>\n"
+)
+
+fstring = f"We have to remember to escape {braces}. Like {{these}}. But not {this}."
+
+welcome_to_programming = R"hello," R" world!"
+
+fstring = (
+    f"f-strings definitely make things more {difficult} than they need to be for"
+    " {black}. But boy they sure are handy. The problem is that some lines will need"
+    f" to have the 'f' whereas others do not. This {line}, for example, needs one."
+)
+
+x = (
+    "This is a long string which contains an f-expr that should not split"
+    f" {{{[i for i in range(5)]}}}."
+)
diff --git a/tests/data/string_prefixes.py b/tests/data/string_prefixes.py
@@ -6,6 +6,17 @@
 r"hello"
 fR"hello"
 
+
+def docstring_singleline():
+    R"""2020 was one hell of a year. The good news is that we were able to"""
+
+
+def docstring_multiline():
+    R"""
+    clear out all of the issues opened in that time :p
+    """
+
+
 # output
 
 
@@ -16,3 +27,13 @@
 b"hello"
 r"hello"
 fR"hello"
+
+
+def docstring_singleline():
+    R"""2020 was one hell of a year. The good news is that we were able to"""
+
+
+def docstring_multiline():
+    R"""
+    clear out all of the issues opened in that time :p
+    """