Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
436 views
1
# -*- coding: utf-8 -*-
2
"""
3
GSC Software: symbolic encoding tools
4
Author: Nick Becker and Pyeong Whan Cho
5
Department of Cognitive Science, Johns Hopkins University
6
Summer 2015
7
8
NOTE (7/19/15): These functions are not extensively tested!
9
"""
10
11
import sys
12
import numpy as np
13
from numbers import Number
14
15
TP_SYMBOL = '@'
16
ADD_SYMBOL = '+'
17
SCALE_SYMBOL = '*'
18
COMPRESS_SYMBOL = '$'
19
20
# create symbol representations
21
def encode_symbols(nsymbols, reptype='local', dp=0, ndim=None):
22
"""
23
Encodes symbols as vectors.
24
"""
25
if reptype == 'local':
26
sym_mat = np.eye(nsymbols)
27
else:
28
if ndim==None:
29
ndim = nsymbols
30
31
if isinstance(dp, Number):
32
# dp is a scalar that represents the expected pairwise similarity (dot product).
33
sym_mat = dot_products(nsymbols, ndim, dp)
34
else:
35
sym_mat = dot_products2(nsymbols, ndim, dp)
36
return sym_mat
37
38
def dot_products(nsymbols, ndim, s):
39
"""
40
Convert s to a matrix, then call the real dot product function...
41
"""
42
dp_mat = s * np.ones((nsymbols, nsymbols)) + (1-s) * np.eye(nsymbols, nsymbols)
43
sym_mat = dot_products2(nsymbols, ndim, dp_mat)
44
return sym_mat
45
46
def dot_products2(nsymbols, ndim, dp_mat):
47
"""
48
Given square matrix dpMatrix of dimension N-by-N, find N
49
dim-dimensional unit vectors whose pairwise dot products match
50
dpMatrix. Results are returned in the columns of M. itns is the
51
number of iterations of search required, and may be ignored.
52
53
Algorithm: Find a matrix M such that M'*M = dpMatrix. This is done
54
via gradient descent on a cost function that is the square of the
55
frobenius norm of (M'*M-dpMatrix).
56
57
NOTE: It has trouble finding more than about 16 vectors, possibly for
58
dumb numerical reasons (like stepsize and tolerance), which might be
59
fixable if necessary.
60
"""
61
if not (dp_mat.T == dp_mat).all():
62
# print error
63
sys.exit('dot_products2: dp_mat must be symmetric')
64
65
if (np.diag(dp_mat) != 1).any():
66
sys.exit('dot_products2: dp_mat must have all ones on the main diagonal')
67
68
sym_mat = np.random.uniform(size=ndim*nsymbols).reshape(ndim, nsymbols, order='F')
69
min_step = .1
70
tol = 1e-6
71
max_iter = 100000
72
converged = False
73
for iter_num in range(1, max_iter+1):
74
inc = sym_mat.dot(sym_mat.T.dot(sym_mat) - dp_mat)
75
step = min(min_step, .01/abs(inc).max())
76
sym_mat = sym_mat - step * inc
77
max_diff = abs(sym_mat.T.dot(sym_mat)-dp_mat).max()
78
if max_diff <= tol:
79
converged = True
80
break
81
82
if not converged:
83
print("Didn't converge after %d iterations" % max_iter)
84
85
return sym_mat
86
87
def add_symbols(names, colvecs, symdict):
88
"""
89
Adds symbols and corresponding vectors to a dictionary.
90
"""
91
for ii in range(len(names)):
92
symdict[names[ii]] = colvecs[:, ii]
93
return symdict
94
95
def interpretEqn(eqn, symdict, c34=False):
96
"""
97
Prepares a symbolic equation for interpretation using the recursive function
98
evaluate().
99
100
If c34 is set to 'True', then compress third and fourth vectors. This is a
101
temporary fix!!!
102
"""
103
eqnList = eqn.strip().split()
104
tempSymDict = symdict.copy()
105
106
# separate (1) open parens, (2) close parens, and (3) compress symbols from other items
107
newEqnList = []
108
for i in range(len(eqnList)):
109
currentSymbol1 = eqnList[i]
110
toInsert_1of3 = currentSymbol1.split('(')
111
for j in range(len(toInsert_1of3)-1):
112
toInsert_1of3.insert(j+1, '(')
113
114
for k in range(len(toInsert_1of3)):
115
currentSymbol2 = toInsert_1of3[k]
116
toInsert_2of3 = currentSymbol2.split(')')
117
for l in range(len(toInsert_2of3)-1):
118
toInsert_2of3.insert(l+1, ')')
119
120
for m in range(len(toInsert_2of3)):
121
currentSymbol3 = toInsert_2of3[m]
122
toInsert_3of3 = currentSymbol3.split(COMPRESS_SYMBOL)
123
for n in range(len(toInsert_3of3)-1):
124
toInsert_3of3.insert(n+1, COMPRESS_SYMBOL)
125
126
newEqnList += toInsert_3of3
127
128
eqnList = [x for x in newEqnList if x != '']
129
130
return evaluate(eqnList, tempSymDict)
131
132
133
def evaluate(symEqn, symdict, idNo=0):
134
"""
135
Recursive function to perform symbolc computation.
136
"""
137
138
if '(' not in symEqn:
139
140
# evaluate all scalars
141
while SCALE_SYMBOL in symEqn:
142
index = symEqn.index(SCALE_SYMBOL)
143
if index-1 < 0 or index+1 > len(symEqn):
144
sys.exit('Error: missing operand')
145
else:
146
newName = 'scaled' + str(idNo); idNo += 1
147
symdict[newName] = int(symEqn[index-1]) * symdict[symEqn[index+1]]
148
del symEqn[index-1:index+2]
149
symEqn.insert(index-1, newName)
150
151
# evaluate all TPs
152
while TP_SYMBOL in symEqn:
153
index = symEqn.index(TP_SYMBOL)
154
if index-1 < 0 or index+1 > len(symEqn):
155
sys.exit('Error: missing operand')
156
else:
157
newName = 'prod' + str(idNo); idNo += 1
158
symdict[newName] = np.tensordot(symdict[symEqn[index-1]], symdict[symEqn[index+1]], axes=0)
159
del symEqn[index-1:index+2]
160
symEqn.insert(index-1, newName)
161
162
# evaluate all sums
163
while ADD_SYMBOL in symEqn:
164
index = symEqn.index(ADD_SYMBOL)
165
if index-1 < 0 or index+1 > len(symEqn):
166
sys.exit('Error: missing operand')
167
else:
168
newName = 'sum' + str(idNo); idNo += 1
169
symdict[newName] = (symdict[symEqn[index-1]] + symdict[symEqn[index+1]])
170
del symEqn[index-1:index+2]
171
symEqn.insert(index-1, newName)
172
173
return symdict[symEqn[0]]
174
175
else:
176
177
start = symEqn.index('(')
178
end = getClosingParenIndex(symEqn, start)
179
180
if COMPRESS_SYMBOL in symEqn:
181
cSymIndex = symEqn.index(COMPRESS_SYMBOL)
182
compressIndices = list(symEqn[cSymIndex+1])
183
toCompress = evaluate(symEqn[start+1:end-1], symdict, idNo)
184
185
nDim = len(toCompress.shape)
186
187
# create string of length nDim for einsum function
188
startIndex = 'a' # change to 'i' to be more conventional
189
einSumString = []
190
for i in range(nDim):
191
einSumString.append(chr(ord(startIndex) + i))
192
193
# now set all indices in compressIndices to the same letter...
194
compressIndices = list(map(int, compressIndices))
195
setIndexTo = einSumString[compressIndices[0]]
196
for i in range(len(compressIndices)):
197
einSumString[compressIndices[i]-1] = setIndexTo
198
199
einSumString = ''.join(einSumString)
200
201
newName = 'compress' + str(idNo); idNo += 1
202
symdict[newName] = np.einsum(einSumString, toCompress)
203
204
del symEqn[cSymIndex:end]
205
symEqn.insert(cSymIndex, newName)
206
207
else:
208
newName = 'paren' + str(idNo); idNo += 1
209
symdict[newName] = evaluate(symEqn[start+1:end-1], symdict, idNo)
210
211
del symEqn[start:end]
212
symEqn.insert(start, newName)
213
214
return evaluate(symEqn, symdict, idNo)
215
216
def getClosingParenIndex(stringExpr, openParenIndex):
217
"""
218
Helper function to get index of end paren corresponding to openParenIndex.
219
"""
220
balance = 1
221
end = openParenIndex + 1
222
while end < len(stringExpr) and balance != 0:
223
if stringExpr[end] == '(':
224
balance += 1
225
elif stringExpr[end] == ')':
226
balance -= 1
227
end += 1
228
229
return end
230
231