Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle NFC/NFD strings that normalize to the same string. #10355

Merged
merged 14 commits into from
Oct 10, 2022
1 change: 1 addition & 0 deletions AUTHORS
Expand Up @@ -154,6 +154,7 @@ Ian Bicking
Ian Lesperance
Ilya Konstantinov
Ionuț Turturică
Itxaso Aizpurua
Iwan Briquemont
Jaap Broekhuizen
Jakob van Santen
Expand Down
1 change: 1 addition & 0 deletions changelog/3426.improvement.rst
@@ -0,0 +1 @@
Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead.
17 changes: 13 additions & 4 deletions src/_pytest/_io/saferepr.py
Expand Up @@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr):
information on exceptions raised during the call.
"""

def __init__(self, maxsize: Optional[int]) -> None:
def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None:
"""
:param maxsize:
If not None, will truncate the resulting repr to that specific size, using ellipsis
Expand All @@ -54,9 +54,13 @@ def __init__(self, maxsize: Optional[int]) -> None:
# truncation.
self.maxstring = maxsize if maxsize is not None else 1_000_000_000
self.maxsize = maxsize
self.use_ascii = use_ascii

def repr(self, x: object) -> str:
try:
if self.use_ascii:
return ascii(x)

s = super().repr(x)
Zac-HD marked this conversation as resolved.
Show resolved Hide resolved
except (KeyboardInterrupt, SystemExit):
raise
Expand Down Expand Up @@ -94,7 +98,9 @@ def safeformat(obj: object) -> str:
DEFAULT_REPR_MAX_SIZE = 240


def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str:
def saferepr(
obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False
) -> str:
"""Return a size-limited safe repr-string for the given object.

Failing __repr__ functions of user instances will be represented
Expand All @@ -104,10 +110,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str
This function is a wrapper around the Repr/reprlib functionality of the
stdlib.
"""
return SafeRepr(maxsize).repr(obj)

return SafeRepr(maxsize, use_ascii).repr(obj)


def saferepr_unlimited(obj: object) -> str:
def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str:
"""Return an unlimited-size safe repr-string for the given object.

As with saferepr, failing __repr__ functions of user instances
Expand All @@ -119,6 +126,8 @@ def saferepr_unlimited(obj: object) -> str:
when maxsize=None, but that might affect some other code.
"""
try:
if use_ascii:
return ascii(obj)
return repr(obj)
except Exception as exc:
return _format_repr_exception(exc, obj)
Expand Down
23 changes: 18 additions & 5 deletions src/_pytest/assertion/util.py
Expand Up @@ -10,6 +10,7 @@
from typing import Mapping
from typing import Optional
from typing import Sequence
from unicodedata import normalize

import _pytest._code
from _pytest import outcomes
Expand Down Expand Up @@ -156,20 +157,32 @@ def has_default_eq(
return True


def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]:
def assertrepr_compare(
config, op: str, left: Any, right: Any, use_ascii: bool = False
) -> Optional[List[str]]:
"""Return specialised explanations for some operators/operands."""
verbose = config.getoption("verbose")

# Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier.
# See issue #3246.
use_ascii = (
Zac-HD marked this conversation as resolved.
Show resolved Hide resolved
isinstance(left, str)
and isinstance(right, str)
and normalize("NFD", left) == normalize("NFD", right)
)

if verbose > 1:
left_repr = saferepr_unlimited(left)
right_repr = saferepr_unlimited(right)
left_repr = saferepr_unlimited(left, use_ascii=use_ascii)
right_repr = saferepr_unlimited(right, use_ascii=use_ascii)
else:
# XXX: "15 chars indentation" is wrong
# ("E AssertionError: assert "); should use term width.
maxsize = (
80 - 15 - len(op) - 2
) // 2 # 15 chars indentation, 1 space around op
left_repr = saferepr(left, maxsize=maxsize)
right_repr = saferepr(right, maxsize=maxsize)

left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii)
right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii)

summary = f"{left_repr} {op} {right_repr}"

Expand Down
18 changes: 18 additions & 0 deletions testing/test_assertion.py
Expand Up @@ -776,6 +776,24 @@ def test_mojibake(self) -> None:
msg = "\n".join(expl)
assert msg

def test_nfc_nfd_same_string(self) -> None:
# issue 3426
left = "hyv\xe4"
right = "hyva\u0308"
expl = callequal(left, right)
assert expl == [
r"'hyv\xe4' == 'hyva\u0308'",
f"- {str(right)}",
f"+ {str(left)}",
]

expl = callequal(left, right, verbose=2)
assert expl == [
r"'hyv\xe4' == 'hyva\u0308'",
f"- {str(right)}",
f"+ {str(left)}",
]


class TestAssert_reprcompare_dataclass:
def test_dataclasses(self, pytester: Pytester) -> None:
Expand Down