Regression fix: leave R prefixes capitalization alone (#2285)

`black.strings.get_string_prefix` used to lowercase the extracted prefix before returning it. This is wrong because 1) it ignores the fact we should leave R prefixes alone because of MagicPython, and 2) there is dedicated prefix casing handling code that fixes issue 1. `.lower` is too naive. This was originally fixed in 20.8b0, but was reintroduced since 21.4b0. I also added proper prefix normalization for docstrings by using the `black.strings.normalize_string_prefix` helper. Some more test strings were added to make sure strings with capitalized prefixes aren't treated differently (actually happened with my original patch, Jelle had to point it out to me).
psf · Jun 9, 2021 · 00e7e12 · 00e7e12
1 parent a9eab85
commit 00e7e12
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 8 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -12,6 +12,7 @@
 - Added `--required-version` option to require a specific version to be running (#2300)
 - Fix incorrect custom breakpoint indices when string group contains fake f-strings
   (#2311)
+- Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
 
 ## 21.5b2
 

diff --git a/src/black/linegen.py b/src/black/linegen.py
@@ -226,8 +226,9 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
         if is_docstring(leaf) and "\\\n" not in leaf.value:
             # We're ignoring docstrings with backslash newline escapes because changing
             # indentation of those changes the AST representation of the code.
-            prefix = get_string_prefix(leaf.value)
-            docstring = leaf.value[len(prefix) :]  # Remove the prefix
+            docstring = normalize_string_prefix(leaf.value, self.remove_u_prefix)
+            prefix = get_string_prefix(docstring)
+            docstring = docstring[len(prefix) :]  # Remove the prefix
             quote_char = docstring[0]
             # A natural way to remove the outer quotes is to do:
             #   docstring = docstring.strip(quote_char)

diff --git a/src/black/strings.py b/src/black/strings.py
@@ -87,7 +87,7 @@ def get_string_prefix(string: str) -> str:
     prefix = ""
     prefix_idx = 0
     while string[prefix_idx] in STRING_PREFIX_CHARS:
-        prefix += string[prefix_idx].lower()
+        prefix += string[prefix_idx]
         prefix_idx += 1
 
     return prefix

diff --git a/src/black/trans.py b/src/black/trans.py
@@ -411,7 +411,7 @@ def make_naked(string: str, string_prefix: str) -> str:
             and is_valid_index(next_str_idx)
             and LL[next_str_idx].type == token.STRING
         ):
-            prefix = get_string_prefix(LL[next_str_idx].value)
+            prefix = get_string_prefix(LL[next_str_idx].value).lower()
             next_str_idx += 1
 
         # The next loop merges the string group. The final string will be
@@ -431,7 +431,7 @@ def make_naked(string: str, string_prefix: str) -> str:
             num_of_strings += 1
 
             SS = LL[next_str_idx].value
-            next_prefix = get_string_prefix(SS)
+            next_prefix = get_string_prefix(SS).lower()
 
             # If this is an f-string group but this substring is not prefixed
             # with 'f'...
@@ -541,7 +541,7 @@ def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
                 return TErr("StringMerger does NOT merge multiline strings.")
 
             num_of_strings += 1
-            prefix = get_string_prefix(leaf.value)
+            prefix = get_string_prefix(leaf.value).lower()
             if "r" in prefix:
                 return TErr("StringMerger does NOT merge raw strings.")
 
@@ -1036,7 +1036,7 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
         is_valid_index = is_valid_index_factory(LL)
         insert_str_child = insert_str_child_factory(LL[string_idx])
 
-        prefix = get_string_prefix(LL[string_idx].value)
+        prefix = get_string_prefix(LL[string_idx].value).lower()
 
         # We MAY choose to drop the 'f' prefix from substrings that don't
         # contain any f-expressions, but ONLY if the original f-string
@@ -1290,7 +1290,7 @@ def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
 
             yield from _fexpr_slices
 
-        is_fstring = "f" in get_string_prefix(string)
+        is_fstring = "f" in get_string_prefix(string).lower()
 
         def breaks_fstring_expression(i: Index) -> bool:
             """

diff --git a/tests/data/long_strings__regression.py b/tests/data/long_strings__regression.py
@@ -495,6 +495,26 @@ async def foo(self):
     f"9. You now have a key to add to `{prefix}set api youtube api_key`"
 )
 
+# It shouldn't matter if the string prefixes are capitalized.
+temp_msg = (
+    F"{F'{humanize_number(pos)}.': <{pound_len+2}} "
+    F"{balance: <{bal_len + 5}} "
+    F"<<{author.display_name}>>\n"
+)
+
+fstring = (
+    F"We have to remember to escape {braces}."
+    " Like {these}."
+    F" But not {this}."
+)
+
+welcome_to_programming = R"hello," R" world!"
+
+fstring = F"f-strings definitely make things more {difficult} than they need to be for {{black}}. But boy they sure are handy. The problem is that some lines will need to have the 'f' whereas others do not. This {line}, for example, needs one."
+
+x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."
+
+
 # output
 
 
@@ -1100,3 +1120,25 @@ async def foo(self):
     "8. No application restrictions are needed. Click Create at the bottom."
     f"9. You now have a key to add to `{prefix}set api youtube api_key`"
 )
+
+# It shouldn't matter if the string prefixes are capitalized.
+temp_msg = (
+    f"{F'{humanize_number(pos)}.': <{pound_len+2}} "
+    f"{balance: <{bal_len + 5}} "
+    f"<<{author.display_name}>>\n"
+)
+
+fstring = f"We have to remember to escape {braces}. Like {{these}}. But not {this}."
+
+welcome_to_programming = R"hello," R" world!"
+
+fstring = (
+    f"f-strings definitely make things more {difficult} than they need to be for"
+    " {black}. But boy they sure are handy. The problem is that some lines will need"
+    f" to have the 'f' whereas others do not. This {line}, for example, needs one."
+)
+
+x = (
+    "This is a long string which contains an f-expr that should not split"
+    f" {{{[i for i in range(5)]}}}."
+)
diff --git a/tests/data/string_prefixes.py b/tests/data/string_prefixes.py
@@ -6,6 +6,17 @@
 r"hello"
 fR"hello"
 
+
+def docstring_singleline():
+    R"""2020 was one hell of a year. The good news is that we were able to"""
+
+
+def docstring_multiline():
+    R"""
+    clear out all of the issues opened in that time :p
+    """
+
+
 # output
 
 
@@ -16,3 +27,13 @@
 b"hello"
 r"hello"
 fR"hello"
+
+
+def docstring_singleline():
+    R"""2020 was one hell of a year. The good news is that we were able to"""
+
+
+def docstring_multiline():
+    R"""
+    clear out all of the issues opened in that time :p
+    """