Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
436 views
1
# -*- coding: utf-8 -*-
2
"""
3
GSC Software: Tree class
4
Author: Nick Becker
5
Department of Cognitive Science, Johns Hopkins University
6
Summer 2015
7
"""
8
9
class Tree():
10
"""
11
The last tree class you will ever need. Currently under development.
12
"""
13
14
# -------------------------------------------------------------------------
15
# Constructor and supporting functions
16
17
def __init__ (self, treeString):
18
"""
19
Create an internal representation of a tree by passing in a text version of it.
20
Formatting rules apply: trees must be passed to this method in the form
21
A (B A (D E))
22
23
In other words, parentheses enclose a node's children, and sister nodes are
24
separated by whitespace. Parentheses were chosen over brackets to avoid any
25
confusion when entering HNF trees, which contain many bracketed symbols.
26
27
Any symbols can be used in node labels EXCEPT parentheses, commas, and
28
percent signs (%).
29
30
Calling this method automatically sets the recursive-style filler-role bindings
31
for each node, as well as the span-style filler-role bindings for each node.
32
"""
33
self.treeList = self.treeStringToList(treeString)
34
35
self.recursiveFRbindings = []
36
self.setRecursiveRoles(self.treeList, 'r')
37
38
self.spanFRbindings = []
39
self.setSpanRoles(self.treeList)
40
41
def treeStringToList(self, inputString):
42
"""
43
Check formatting of the input string; if it is valid, prepare it for internal
44
operations.
45
"""
46
47
niceList = inputString.replace(',','').split()
48
49
# clean up input such that all parentheses are separate from symbols
50
i = 0
51
while i < len(niceList):
52
currentSymbol = niceList[i]
53
if len(currentSymbol) > 1:
54
if currentSymbol[0] == '(' or currentSymbol[0] == ')':
55
niceList[i] = currentSymbol[1:]
56
niceList.insert(i, currentSymbol[0])
57
i += 1
58
currentSymbol = niceList[i]
59
if currentSymbol[-1] == '(' or currentSymbol[-1] == ')':
60
niceList[i] = currentSymbol[:-1]
61
niceList.insert(i+1, currentSymbol[-1])
62
i -= 1
63
i += 1
64
65
# insert commas between sibling nodes, if need be.
66
# this is currently required for the algorithm used to set recursive roles
67
i = 0
68
while i < len(niceList) - 1:
69
currentSymbol = niceList[i]
70
nextSymbol = niceList[i+1]
71
if (currentSymbol != ')' and currentSymbol != '(' and nextSymbol != '(' and nextSymbol != ')') or \
72
(currentSymbol == ')' and nextSymbol != '(' and nextSymbol != ')'):
73
# insert a comma
74
niceList.insert(i+1, ',')
75
i += 1
76
i += 1
77
78
return niceList
79
80
def setRecursiveRoles(self, treeNodes, level):
81
"""
82
Recursive function to populate recursiveFRbindings.
83
"""
84
if len(treeNodes) > 0:
85
if treeNodes[0] == '(':
86
level = str(0) + level
87
elif treeNodes[0] == ',':
88
newLevel = str(int(level[0]) + 1) + level[1:]
89
level = newLevel
90
elif treeNodes[0] == ')':
91
level = level[1:]
92
else:
93
self.recursiveFRbindings.append((treeNodes[0], level))
94
95
self.setRecursiveRoles(treeNodes[1:], level)
96
97
else :
98
# all the recursive FR bindings are in place now, so let's reorder them
99
needSwapped = True
100
while needSwapped:
101
needSwapped = False
102
for i in range(len(self.recursiveFRbindings) - 1):
103
if len(self.recursiveFRbindings[i][1]) > len(self.recursiveFRbindings[i+1][1]):
104
# swap
105
temp = self.recursiveFRbindings[i]
106
self.recursiveFRbindings[i] = self.recursiveFRbindings[i+1]
107
self.recursiveFRbindings[i+1] = temp
108
needSwapped = True
109
110
def setSpanRoles(self, treeList):
111
"""
112
A dumb, iterative function to populate spanFRbindings. Once again, probably
113
bad programming practice to populate a variable like this instead of returning
114
a value.
115
116
A nice recursive algorithm to do this task is certainly possible and would
117
be far more elegant, but it turned out not to be as straighforward as I initially
118
thought... hence the inefficient iterative algorithm here. Luckily, this
119
shouldn't be a problem because most users will be looking at very small
120
trees.
121
"""
122
subTrees = self.findSubTrees(treeList)
123
124
# manually set spans of leaf nodes. this would be the base case in the recursive version.
125
spanBase = 0
126
spanDict = {}
127
for i in range(len(subTrees)):
128
if len(subTrees[i][1]) == 0:
129
spanDict[subTrees[i][0]] = str(spanBase) + str(spanBase+1)
130
spanBase += 1
131
132
# iteratively find the rest of the spans
133
done = False
134
while not done:
135
for i in range(len(subTrees)):
136
if len(subTrees[i][1]) != 0:
137
span = ''
138
invalidSpan = False
139
for child in subTrees[i][1]:
140
if child in spanDict:
141
childSpan = spanDict[child]
142
span = span + childSpan[0] + childSpan[-1]
143
else:
144
invalidSpan = True; # not all children were there
145
146
if not invalidSpan:
147
span = ''.join(sorted(set(span), key=span.index))
148
spanDict[subTrees[i][0]] = span
149
150
if len(spanDict) == len(subTrees):
151
done = True
152
153
tempBindings = len(spanDict) * [None]
154
for key in spanDict.keys():
155
splitKey = key.split('%')
156
tempBindings[int(splitKey[1])] = (splitKey[0], spanDict[key]);
157
158
self.spanFRbindings = tempBindings
159
160
def findSubTrees(self, treeList):
161
"""
162
Given a tree in list form, find the immediate children of each node.
163
"""
164
idNo = 0 # unique identifer for each node
165
for i in range(len(treeList) - 1):
166
if treeList[i] != '(' and treeList[i] != ',' and treeList[i] != ')':
167
treeList[i] += '%' + str(idNo) # since users might want to use periods in trees
168
idNo += 1
169
170
subTrees = []
171
for i in range(len(treeList) - 1):
172
if treeList[i] != '(' and treeList[i] != ',' and treeList[i] != ')':
173
currentChildren = []
174
# peek at next node; if it's '[', then this node has children and we need to collect them
175
j = i + 1
176
hasChildren = (treeList[j] == '(')
177
if hasChildren:
178
embedded = -1
179
j += 1
180
while j < len(treeList) and embedded != 0:
181
if treeList[j] == ')':
182
embedded += 1;
183
elif treeList[j] == '(':
184
embedded -= 1;
185
elif treeList[j] != ',' and embedded == -1:
186
currentChildren.append(treeList[j] )
187
j += 1
188
subTrees.append((treeList[i], currentChildren))
189
190
return subTrees
191
192
193
# -------------------------------------------------------------------------
194
# Setters and supporting functions
195
196
def getFRbindings(self):
197
return self.recursiveFRbindings
198
199
200
# -------------------------------------------------------------------------
201
# Pretty 'toString' methods
202
203
def recursiveFRtoString(self):
204
"""
205
Gets a pretty string for the recursive-style filler-role bindings.
206
"""
207
returnString = '{'
208
for i in range(len(self.recursiveFRbindings)):
209
if i != 0:
210
returnString += ' '
211
returnString += self.recursiveFRbindings[i][0] + '/' + self.recursiveFRbindings[i][1]
212
if i != (len(self.recursiveFRbindings) - 1) :
213
returnString += '; \n'
214
returnString += '}\n'
215
216
return returnString
217
218
def spanFRtoString(self):
219
"""
220
Gets a pretty string for the span-style filler-role bindings.
221
"""
222
returnString = '{'
223
for i in range(len(self.spanFRbindings)):
224
if i != 0:
225
returnString += ' '
226
returnString += self.spanFRbindings[i][0] + '/' + self.spanFRbindings[i][1]
227
if i != (len(self.spanFRbindings) - 1) :
228
returnString += '; \n'
229
returnString += '}\n'
230
231
return returnString
232
233