-
Notifications
You must be signed in to change notification settings - Fork 2.8k
/
test_rte_classify.py
94 lines (81 loc) · 2.61 KB
/
test_rte_classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import pytest
from nltk import config_megam
from nltk.classify.rte_classify import RTEFeatureExtractor, rte_classifier, rte_features
from nltk.corpus import rte as rte_corpus
expected_from_rte_feature_extration = """
alwayson => True
ne_hyp_extra => 0
ne_overlap => 1
neg_hyp => 0
neg_txt => 0
word_hyp_extra => 3
word_overlap => 3
alwayson => True
ne_hyp_extra => 0
ne_overlap => 1
neg_hyp => 0
neg_txt => 0
word_hyp_extra => 2
word_overlap => 1
alwayson => True
ne_hyp_extra => 1
ne_overlap => 1
neg_hyp => 0
neg_txt => 0
word_hyp_extra => 1
word_overlap => 2
alwayson => True
ne_hyp_extra => 1
ne_overlap => 0
neg_hyp => 0
neg_txt => 0
word_hyp_extra => 6
word_overlap => 2
alwayson => True
ne_hyp_extra => 1
ne_overlap => 0
neg_hyp => 0
neg_txt => 0
word_hyp_extra => 4
word_overlap => 0
alwayson => True
ne_hyp_extra => 1
ne_overlap => 0
neg_hyp => 0
neg_txt => 0
word_hyp_extra => 3
word_overlap => 1
"""
class TestRTEClassifier:
# Test the feature extraction method.
def test_rte_feature_extraction(self):
pairs = rte_corpus.pairs(["rte1_dev.xml"])[:6]
test_output = [
f"{key:<15} => {rte_features(pair)[key]}"
for pair in pairs
for key in sorted(rte_features(pair))
]
expected_output = expected_from_rte_feature_extration.strip().split("\n")
# Remove null strings.
expected_output = list(filter(None, expected_output))
assert test_output == expected_output
# Test the RTEFeatureExtractor object.
def test_feature_extractor_object(self):
rtepair = rte_corpus.pairs(["rte3_dev.xml"])[33]
extractor = RTEFeatureExtractor(rtepair)
assert extractor.hyp_words == {"member", "China", "SCO."}
assert extractor.overlap("word") == set()
assert extractor.overlap("ne") == {"China"}
assert extractor.hyp_extra("word") == {"member"}
# Test the RTE classifier training.
def test_rte_classification_without_megam(self):
# Use a sample size for unit testing, since we
# don't need to fully train these classifiers
clf = rte_classifier("IIS", sample_N=100)
clf = rte_classifier("GIS", sample_N=100)
def test_rte_classification_with_megam(self):
try:
config_megam()
except (LookupError, AttributeError) as e:
pytest.skip("Skipping tests with dependencies on MEGAM")
clf = rte_classifier("megam", sample_N=100)