psf · JelleZijlstra · Jan 22, 2023 · Jan 30, 2022 · Jan 31, 2022 · Mar 13, 2022
diff --git a/CHANGES.md b/CHANGES.md
@@ -12,7 +12,7 @@
 
 ### Preview style
 
-<!-- Changes that affect Black's preview style -->
+- Format hex code in unicode escape sequences in string literals (#2916)
 
 - Code cell separators `#%%` are now standardised to `# %%` (#2919)
 - Avoid magic-trailing-comma in single-element subscripts (#2942)

diff --git a/src/black/linegen.py b/src/black/linegen.py
@@ -24,8 +24,13 @@
 from black.lines import can_omit_invisible_parens, can_be_split, append_leaves
 from black.comments import generate_comments, list_comments, FMT_OFF
 from black.numerics import normalize_numeric_literal
-from black.strings import get_string_prefix, fix_docstring
-from black.strings import normalize_string_prefix, normalize_string_quotes
+from black.strings import (
+    get_string_prefix,
+    fix_docstring,
+    normalize_string_prefix,
+    normalize_string_quotes,
+    normalize_unicode_escape_sequences,
+)
 from black.trans import Transformer, CannotTransform, StringMerger, StringSplitter
 from black.trans import StringParenWrapper, StringParenStripper, hug_power_op
 from black.mode import Mode, Feature, Preview
@@ -260,6 +265,9 @@ def visit_factor(self, node: Node) -> Iterator[Line]:
         yield from self.visit_default(node)
 
     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
+        if Preview.hex_codes_in_unicode_sequences in self.mode:
+            normalize_unicode_escape_sequences(leaf)
+
         if is_docstring(leaf) and "\\\n" not in leaf.value:
             # We're ignoring docstrings with backslash newline escapes because changing
             # indentation of those changes the AST representation of the code.

diff --git a/src/black/mode.py b/src/black/mode.py
@@ -127,6 +127,7 @@ class Preview(Enum):
     """Individual preview style features."""
 
     string_processing = auto()
+    hex_codes_in_unicode_sequences = auto()
     one_element_subscript = auto()
 
 

diff --git a/src/black/strings.py b/src/black/strings.py
@@ -5,7 +5,9 @@
 import re
 import sys
 from functools import lru_cache
-from typing import List, Pattern
+from typing import List, Pattern, Match
+
+from blib2to3.pytree import Leaf
 
 if sys.version_info < (3, 8):
     from typing_extensions import Final
@@ -18,6 +20,15 @@
     r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL
 )
 FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)")
+UNICODE_RE: Final = re.compile(
+    r"(\\+)("
+    r"(u([a-zA-Z0-9]{4}))"  # Character with 16-bit hex value xxxx
+    r"|(U([a-zA-Z0-9]{0,8}))"  # Character with 32-bit hex value xxxxxxxx
+    r"|(x([a-zA-Z0-9]{2}))"  # Character with hex value hh
+    r"|(N\{([a-zA-Z0-9]{2})\})"  # Character named name in the Unicode database
+    r")",
+    re.VERBOSE,
+)
 
 
 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
@@ -236,3 +247,31 @@ def normalize_string_quotes(s: str) -> str:
         return s  # Prefer double quotes
 
     return f"{prefix}{new_quote}{new_body}{new_quote}"
+
+
+def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
+    """Replace hex codes in Unicode escape sequences with lowercase representation."""
+    text = leaf.value
+    prefix = get_string_prefix(text)
+
+    def replace(m: Match[str]) -> str:
+        groups = m.groups()
+        back_slashes = groups[0]
+
+        if len(back_slashes) % 2 == 0 or prefix == "r":
+            return back_slashes + groups[1]
+
+        if groups[2]:
+            # \u
+            return back_slashes + "u" + groups[3].lower()
+        elif groups[4]:
+            # \U
+            return back_slashes + "U" + groups[5].lower()
+        elif groups[6]:
+            # \x
+            return back_slashes + "x" + groups[7].lower()
+        else:
+            # \N{}
+            return back_slashes + "N{" + groups[9].upper() + "}"
+
+    leaf.value = re.sub(UNICODE_RE, replace, text)
diff --git a/tests/data/format_unicode_escape_seq.py b/tests/data/format_unicode_escape_seq.py
@@ -0,0 +1,23 @@
+x = "\x1F"
+x = "\\x1B"
+x = "\\\x1B"
+x = "\U0001F60E"
+x = "\u0001F60E"
+x = r"\u0001F60E"
+x = "don't format me"
+x = "\xA3"
+x = "\u2717"
+x = "\N{ox}\N{OX}"
+
+# output
+
+x = "\x1f"
+x = "\\x1B"
+x = "\\\x1b"
+x = "\U0001f60e"
+x = "\u0001F60E"
+x = r"\u0001F60E"
+x = "don't format me"
+x = "\xa3"
+x = "\u2717"
+x = "\N{OX}\N{OX}"
diff --git a/tests/test_format.py b/tests/test_format.py
@@ -76,6 +76,7 @@
     "cantfit",
     "comments7",
     "comments8",
+    "format_unicode_escape_seq",
     "long_strings",
     "long_strings__edge_case",
     "long_strings__regression",