/
treetransforms.doctest
154 lines (140 loc) · 4.74 KB
/
treetransforms.doctest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
.. Copyright (C) 2001-2021 NLTK Project
.. For license information, see LICENSE.TXT
-------------------------------------------
Unit tests for the TreeTransformation class
-------------------------------------------
>>> from copy import deepcopy
>>> from nltk.tree import Tree, collapse_unary, chomsky_normal_form, un_chomsky_normal_form
>>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))"
>>> tree = Tree.fromstring(tree_string)
>>> print(tree)
(TOP
(S
(S
(VP
(VBN Turned)
(ADVP (RB loose))
(PP
(IN in)
(NP
(NP (NNP Shane) (NNP Longman) (POS 's))
(NN trading)
(NN room)))))
(, ,)
(NP (DT the) (NN yuppie) (NNS dealers))
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
(. .)))
Make a copy of the original tree and collapse the subtrees with only one child
>>> collapsedTree = deepcopy(tree)
>>> collapse_unary(collapsedTree)
>>> print(collapsedTree)
(TOP
(S
(S+VP
(VBN Turned)
(ADVP (RB loose))
(PP
(IN in)
(NP
(NP (NNP Shane) (NNP Longman) (POS 's))
(NN trading)
(NN room))))
(, ,)
(NP (DT the) (NN yuppie) (NNS dealers))
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
(. .)))
>>> collapsedTree2 = deepcopy(tree)
>>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True)
>>> print(collapsedTree2)
(TOP+S
(S+VP
(VBN Turned)
(ADVP+RB loose)
(PP
(IN in)
(NP
(NP (NNP Shane) (NNP Longman) (POS 's))
(NN trading)
(NN room))))
(, ,)
(NP (DT the) (NN yuppie) (NNS dealers))
(VP (AUX do) (NP (NP+RB little) (ADJP+RB right)))
(. .))
Convert the tree to Chomsky Normal Form i.e. each subtree has either two
subtree children or a single leaf value. This conversion can be performed
using either left- or right-factoring.
>>> cnfTree = deepcopy(collapsedTree)
>>> chomsky_normal_form(cnfTree, factor='left')
>>> print(cnfTree)
(TOP
(S
(S|<S+VP-,-NP-VP>
(S|<S+VP-,-NP>
(S|<S+VP-,>
(S+VP
(S+VP|<VBN-ADVP> (VBN Turned) (ADVP (RB loose)))
(PP
(IN in)
(NP
(NP|<NP-NN>
(NP
(NP|<NNP-NNP> (NNP Shane) (NNP Longman))
(POS 's))
(NN trading))
(NN room))))
(, ,))
(NP (NP|<DT-NN> (DT the) (NN yuppie)) (NNS dealers)))
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))))
(. .)))
>>> cnfTree = deepcopy(collapsedTree)
>>> chomsky_normal_form(cnfTree, factor='right')
>>> print(cnfTree)
(TOP
(S
(S+VP
(VBN Turned)
(S+VP|<ADVP-PP>
(ADVP (RB loose))
(PP
(IN in)
(NP
(NP (NNP Shane) (NP|<NNP-POS> (NNP Longman) (POS 's)))
(NP|<NN-NN> (NN trading) (NN room))))))
(S|<,-NP-VP-.>
(, ,)
(S|<NP-VP-.>
(NP (DT the) (NP|<NN-NNS> (NN yuppie) (NNS dealers)))
(S|<VP-.>
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
(. .))))))
Employ some Markov smoothing to make the artificial node labels a bit more
readable. See the treetransforms.py documentation for more details.
>>> markovTree = deepcopy(collapsedTree)
>>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1)
>>> print(markovTree)
(TOP
(S^<TOP>
(S+VP^<S>
(VBN Turned)
(S+VP|<ADVP-PP>^<S>
(ADVP^<S+VP> (RB loose))
(PP^<S+VP>
(IN in)
(NP^<PP>
(NP^<NP>
(NNP Shane)
(NP|<NNP-POS>^<NP> (NNP Longman) (POS 's)))
(NP|<NN-NN>^<PP> (NN trading) (NN room))))))
(S|<,-NP>^<TOP>
(, ,)
(S|<NP-VP>^<TOP>
(NP^<S> (DT the) (NP|<NN-NNS>^<S> (NN yuppie) (NNS dealers)))
(S|<VP-.>^<TOP>
(VP^<S>
(AUX do)
(NP^<VP> (NP^<NP> (RB little)) (ADJP^<NP> (RB right))))
(. .))))))
Convert the transformed tree back to its original form
>>> un_chomsky_normal_form(markovTree)
>>> tree == markovTree
True