/
test_distance.py
123 lines (118 loc) · 5.32 KB
/
test_distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from typing import Tuple
import pytest
from nltk.metrics.distance import edit_distance
class TestEditDistance:
@pytest.mark.parametrize(
"left,right,substitution_cost,expecteds",
[
# Allowing transpositions reduces the number of edits required.
# with transpositions:
# e.g. "abc" -T-> "cba" -D-> "ca": 2 steps
#
# without transpositions:
# e.g. "abc" -D-> "ab" -D-> "a" -I-> "ca": 3 steps
("abc", "ca", 1, (2, 3)),
("abc", "ca", 5, (2, 3)), # Doesn't *require* substitutions
# Note, a substition_cost of higher than 2 doesn't make much
# sense, as a deletion + insertion is identical, and always
# costs 2.
#
#
# Transpositions don't always reduce the number of edits required:
# with or without transpositions:
# e.g. "wants" -D-> "wats" -D-> "was" -I-> "wasp": 3 steps
("wants", "wasp", 1, (3, 3)),
("wants", "wasp", 5, (3, 3)), # Doesn't *require* substitutions
#
#
# Ought to have the same results with and without transpositions
# with or without transpositions:
# e.g. "rain" -S-> "sain" -S-> "shin" -I-> "shine": 3 steps
# (but cost 5 if substitution_cost=2)
("rain", "shine", 1, (3, 3)),
("rain", "shine", 2, (5, 5)), # Does *require* substitutions
#
#
# Several potentially interesting typos
# with transpositions:
# e.g. "acbdef" -T-> "abcdef": 1 step
#
# without transpositions:
# e.g. "acbdef" -D-> "abdef" -I-> "abcdef": 2 steps
("acbdef", "abcdef", 1, (1, 2)),
("acbdef", "abcdef", 2, (1, 2)), # Doesn't *require* substitutions
#
#
# with transpositions:
# e.g. "lnaguaeg" -T-> "languaeg" -T-> "language": 2 steps
#
# without transpositions:
# e.g. "lnaguaeg" -D-> "laguaeg" -I-> "languaeg" -D-> "languag" -I-> "language": 4 steps
("lnaguaeg", "language", 1, (2, 4)),
("lnaguaeg", "language", 2, (2, 4)), # Doesn't *require* substitutions
#
#
# with transpositions:
# e.g. "lnaugage" -T-> "lanugage" -T-> "language": 2 steps
#
# without transpositions:
# e.g. "lnaugage" -S-> "lnangage" -D-> "langage" -I-> "language": 3 steps
# (but one substitution, so a cost of 4 if substition_cost = 2)
("lnaugage", "language", 1, (2, 3)),
("lnaugage", "language", 2, (2, 4)),
# Does *require* substitutions if no transpositions
#
#
# with transpositions:
# e.g. "lngauage" -T-> "lnaguage" -T-> "language": 2 steps
# without transpositions:
# e.g. "lngauage" -I-> "lanaguage" -D-> "language": 2 steps
("lngauage", "language", 1, (2, 2)),
("lngauage", "language", 2, (2, 2)), # Doesn't *require* substitutions
#
#
# with or without transpositions:
# e.g. "wants" -S-> "sants" -S-> "swnts" -S-> "swits" -S-> "swims" -D-> "swim": 5 steps
#
# with substitution_cost=2 and transpositions:
# e.g. "wants" -T-> "santw" -D-> "sntw" -D-> "stw" -D-> "sw"
# -I-> "swi" -I-> "swim": 6 steps
#
# with substitution_cost=2 and no transpositions:
# e.g. "wants" -I-> "swants" -D-> "swant" -D-> "swan" -D-> "swa" -D-> "sw"
# -I-> "swi" -I-> "swim": 7 steps
("wants", "swim", 1, (5, 5)),
("wants", "swim", 2, (6, 7)),
#
#
# with or without transpositions:
# e.g. "kitten" -S-> "sitten" -s-> "sittin" -I-> "sitting": 3 steps
# (but cost 5 if substitution_cost=2)
("kitten", "sitting", 1, (3, 3)),
("kitten", "sitting", 2, (5, 5)),
],
)
def test_with_transpositions(
self, left: str, right: str, substitution_cost: int, expecteds: Tuple[int, int]
):
"""Test `edit_distance` between two strings, given some `substitution_cost`,
and whether transpositions are allowed.
Args:
left (str): First input string to `edit_distance`.
right (str): Second input string to `edit_distance`.
substitution_cost (int): The cost of a substitution action in `edit_distance`.
expecteds (Tuple[int, int]): A tuple of expected outputs, such that `expecteds[0]` is
the expected output with `transpositions=True`, and `expecteds[1]` is
the expected output with `transpositions=False`.
"""
# Test the input strings in both orderings
for s1, s2 in ((left, right), (right, left)):
# zip with [True, False] to get the transpositions value
for expected, transpositions in zip(expecteds, [True, False]):
predicted = edit_distance(
s1,
s2,
substitution_cost=substitution_cost,
transpositions=transpositions,
)
assert predicted == expected