Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle named escape sequences in format upgrades #542

Merged
merged 1 commit into from Sep 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 25 additions & 14 deletions pyupgrade/_main.py
Expand Up @@ -31,8 +31,10 @@
from pyupgrade._data import Settings
from pyupgrade._data import Version
from pyupgrade._data import visit
from pyupgrade._string_helpers import curly_escape
from pyupgrade._string_helpers import is_ascii
from pyupgrade._string_helpers import is_codec
from pyupgrade._string_helpers import NAMED_UNICODE_RE
from pyupgrade._token_helpers import CLOSING
from pyupgrade._token_helpers import KEYWORDS
from pyupgrade._token_helpers import OPENING
Expand All @@ -47,21 +49,34 @@


def parse_format(s: str) -> Tuple[DotFormatPart, ...]:
"""Makes the empty string not a special case. In the stdlib, there's
loss of information (the type) on the empty string.
"""
parsed = tuple(_stdlib_parse_format(s))
if not parsed:
return ((s, None, None, None),)
else:
return parsed
"""handle named escape sequences"""
ret: List[DotFormatPart] = []

for part in NAMED_UNICODE_RE.split(s):
if NAMED_UNICODE_RE.fullmatch(part):
if not ret:
ret.append((part, None, None, None))
else:
ret[-1] = (ret[-1][0] + part, None, None, None)
else:
first = True
for tup in _stdlib_parse_format(part):
if not first or not ret:
ret.append(tup)
else:
ret[-1] = (ret[-1][0] + tup[0], *tup[1:])
first = False

if not ret:
ret.append((s, None, None, None))

return tuple(ret)


def unparse_parsed_string(parsed: Sequence[DotFormatPart]) -> str:
def _convert_tup(tup: DotFormatPart) -> str:
ret, field_name, format_spec, conversion = tup
ret = ret.replace('{', '{{')
ret = ret.replace('}', '}}')
ret = curly_escape(ret)
if field_name is not None:
ret += '{' + field_name
if conversion:
Expand Down Expand Up @@ -786,10 +801,6 @@ def _fix_py36_plus(contents_text: str, *, min_version: Version) -> str:
return contents_text
for i, token in reversed_enumerate(tokens):
if token.offset in visitor.fstrings:
# TODO: handle \N escape sequences
if r'\N' in token.src:
continue

paren = i + 3
if tokens_to_src(tokens[i + 1:paren + 1]) != '.format(':
continue
Expand Down
12 changes: 3 additions & 9 deletions pyupgrade/_plugins/percent_format.py
Expand Up @@ -18,6 +18,7 @@
from pyupgrade._data import register
from pyupgrade._data import State
from pyupgrade._data import TokenFunc
from pyupgrade._string_helpers import curly_escape
from pyupgrade._token_helpers import KEYWORDS
from pyupgrade._token_helpers import remove_brace
from pyupgrade._token_helpers import victims
Expand Down Expand Up @@ -120,7 +121,8 @@ def _simplify_conversion_flag(flag: str) -> str:
def _percent_to_format(s: str) -> str:
def _handle_part(part: PercentFormat) -> str:
s, fmt = part
s = s.replace('{', '{{').replace('}', '}}')
s = curly_escape(s)

if fmt is None:
return s
else:
Expand Down Expand Up @@ -155,10 +157,6 @@ def _fix_percent_format_tuple(
*,
node_right: ast.Tuple,
) -> None:
# TODO: handle \N escape sequences
if r'\N' in tokens[i].src:
return

# TODO: this is overly timid
paren = i + 4
if tokens_to_src(tokens[i + 1:paren + 1]) != ' % (':
Expand All @@ -181,10 +179,6 @@ def _fix_percent_format_dict(
*,
node_right: ast.Dict,
) -> None:
# TODO: handle \N escape sequences
if r'\N' in tokens[i].src:
return

seen_keys: Set[str] = set()
keys = {}

Expand Down
13 changes: 13 additions & 0 deletions pyupgrade/_string_helpers.py
@@ -1,4 +1,5 @@
import codecs
import re
import string
import sys

Expand All @@ -8,6 +9,18 @@
def is_ascii(s: str) -> bool:
return all(c in string.printable for c in s)

NAMED_UNICODE_RE = re.compile(r'(?<!\\)(?:\\\\)*(\\N\{[^}]+\})')


def curly_escape(s: str) -> str:
parts = NAMED_UNICODE_RE.split(s)
return ''.join(
part.replace('{', '{{').replace('}', '}}')
if not NAMED_UNICODE_RE.fullmatch(part)
else part
for part in parts
)


def is_codec(encoding: str, name: str) -> bool:
try:
Expand Down
15 changes: 13 additions & 2 deletions tests/features/format_literals_test.py
Expand Up @@ -16,6 +16,14 @@ def test_roundtrip_text(s):
assert unparse_parsed_string(parse_format(s)) == s


def test_parse_format_starts_with_named():
# technically not possible since our string always starts with quotes
assert parse_format(r'\N{snowman} hi {0} hello') == (
(r'\N{snowman} hi ', '0', '', None),
(' hello', None, None, None),
)


@pytest.mark.parametrize(
('s', 'expected'),
(
Expand Down Expand Up @@ -49,8 +57,6 @@ def test_intentionally_not_round_trip(s, expected):
"'{' '0}'.format(1)",
# comment looks like placeholder but is not!
'("{0}" # {1}\n"{2}").format(1, 2, 3)',
# TODO: this works by accident (extended escape treated as placeholder)
r'"\N{snowman} {}".format(1)',
# don't touch f-strings (these are wrong but don't make it worse)
'f"{0}".format(a)',
),
Expand Down Expand Up @@ -101,6 +107,11 @@ def test_format_literals_noop(s):
),
# parenthesized string literals
('("{0}").format(1)', '("{}").format(1)'),
pytest.param(
r'"\N{snowman} {0}".format(1)',
r'"\N{snowman} {}".format(1)',
id='named escape sequence',
),
),
)
def test_format_literals(s, expected):
Expand Down
7 changes: 5 additions & 2 deletions tests/features/fstrings_test.py
Expand Up @@ -26,8 +26,6 @@
'"{:{}}".format(x, y)',
'"{a[b]}".format(a=a)',
'"{a.a[b]}".format(a=a)',
# TODO: handle \N escape sequences
r'"\N{snowman} {}".format(a)',
# not enough placeholders / placeholders missing
'"{}{}".format(a)', '"{a}{b}".format(a=a)',
# backslashes and quotes cannot nest
Expand Down Expand Up @@ -58,6 +56,11 @@ def test_fix_fstrings_noop(s):
('"{}{{}}{}".format(escaped, y)', 'f"{escaped}{{}}{y}"'),
('"{}{b}{}".format(a, c, b=b)', 'f"{a}{b}{c}"'),
('"{}".format(0x0)', 'f"{0x0}"'),
pytest.param(
r'"\N{snowman} {}".format(a)',
r'f"\N{snowman} {a}"',
id='named escape sequences',
),
# TODO: poor man's f-strings?
# '"{foo}".format(**locals())'
),
Expand Down
12 changes: 9 additions & 3 deletions tests/features/percent_format_test.py
Expand Up @@ -178,9 +178,6 @@ def test_simplify_conversion_flag(s, expected):
'"%(and)s" % {"and": 2}',
# invalid string formats
'"%" % {}', '"%(hi)" % {}', '"%2" % {}',
# TODO: handle \N escape sequences
r'"%s \N{snowman}" % (a,)',
r'"%(foo)s \N{snowman}" % {"foo": 1}',
),
)
def test_percent_format_noop(s):
Expand Down Expand Up @@ -223,6 +220,15 @@ def test_percent_format_noop_if_bug_16806():
# dict
('"%(k)s" % {"k": "v"}', '"{k}".format(k="v")'),
('"%(to_list)s" % {"to_list": []}', '"{to_list}".format(to_list=[])'),
# \N escapes
(
r'"%s \N{snowman}" % (a,)',
r'"{} \N{snowman}".format(a)',
),
(
r'"%(foo)s \N{snowman}" % {"foo": 1}',
r'"{foo} \N{snowman}".format(foo=1)',
),
),
)
def test_percent_format(s, expected):
Expand Down