CoCalc -- symbolTools.py

⁴³⁶ views
1
# -*- coding: utf-8 -*-
2
"""
3
GSC Software: symbolic encoding tools
4
Author: Nick Becker and Pyeong Whan Cho
5
Department of Cognitive Science, Johns Hopkins University
6
Summer 2015
7

8
NOTE (7/19/15): These functions are not extensively tested!
9
"""
10

11
import sys
12
import numpy as np
13
from numbers import Number
14

15
TP_SYMBOL = '@'
16
ADD_SYMBOL = '+'
17
SCALE_SYMBOL = '*'
18
COMPRESS_SYMBOL = '$'
19

20
# create symbol representations
21
def encode_symbols(nsymbols, reptype='local', dp=0, ndim=None):
22
    """
23
    Encodes symbols as vectors.
24
    """
25
    if reptype == 'local':
26
        sym_mat = np.eye(nsymbols)
27
    else:
28
        if ndim==None:
29
            ndim = nsymbols
30

31
        if isinstance(dp, Number):
32
            # dp is a scalar that represents the expected pairwise similarity (dot product).
33
            sym_mat = dot_products(nsymbols, ndim, dp)
34
        else:
35
            sym_mat = dot_products2(nsymbols, ndim, dp)
36
    return sym_mat
37

38
def dot_products(nsymbols, ndim, s):
39
    """
40
    Convert s to a matrix, then call the real dot product function...
41
    """
42
    dp_mat = s * np.ones((nsymbols, nsymbols)) + (1-s) * np.eye(nsymbols, nsymbols)
43
    sym_mat = dot_products2(nsymbols, ndim, dp_mat)
44
    return sym_mat
45

46
def dot_products2(nsymbols, ndim, dp_mat):
47
    """
48
    Given square matrix dpMatrix of dimension N-by-N, find N
49
    dim-dimensional unit vectors whose pairwise dot products match
50
    dpMatrix. Results are returned in the columns of M. itns is the
51
    number of iterations of search required, and may be ignored.
52

53
    Algorithm: Find a matrix M such that M'*M = dpMatrix. This is done
54
    via gradient descent on a cost function that is the square of the
55
    frobenius norm of (M'*M-dpMatrix).
56

57
    NOTE: It has trouble finding more than about 16 vectors, possibly for
58
    dumb numerical reasons (like stepsize and tolerance), which might be
59
    fixable if necessary.
60
    """
61
    if not (dp_mat.T == dp_mat).all():
62
        # print error
63
        sys.exit('dot_products2: dp_mat must be symmetric')
64

65
    if (np.diag(dp_mat) != 1).any():
66
        sys.exit('dot_products2: dp_mat must have all ones on the main diagonal')
67

68
    sym_mat = np.random.uniform(size=ndim*nsymbols).reshape(ndim, nsymbols, order='F')
69
    min_step = .1
70
    tol = 1e-6
71
    max_iter = 100000
72
    converged = False
73
    for iter_num in range(1, max_iter+1):
74
        inc = sym_mat.dot(sym_mat.T.dot(sym_mat) - dp_mat)
75
        step = min(min_step, .01/abs(inc).max())
76
        sym_mat = sym_mat - step * inc
77
        max_diff = abs(sym_mat.T.dot(sym_mat)-dp_mat).max()
78
        if max_diff <= tol:
79
            converged = True
80
            break
81

82
    if not converged:
83
        print("Didn't converge after %d iterations" % max_iter)
84

85
    return sym_mat
86

87
def add_symbols(names, colvecs, symdict):
88
    """
89
    Adds symbols and corresponding vectors to a dictionary.
90
    """
91
    for ii in range(len(names)):
92
        symdict[names[ii]] = colvecs[:, ii]
93
    return symdict
94

95
def interpretEqn(eqn, symdict, c34=False):
96
    """
97
    Prepares a symbolic equation for interpretation using the recursive function
98
    evaluate().
99

100
    If c34 is set to 'True', then compress third and fourth vectors. This is a
101
    temporary fix!!!
102
    """
103
    eqnList = eqn.strip().split()
104
    tempSymDict = symdict.copy()
105

106
    # separate (1) open parens, (2) close parens, and (3) compress symbols from other items
107
    newEqnList = []
108
    for i in range(len(eqnList)):
109
        currentSymbol1 = eqnList[i]
110
        toInsert_1of3 = currentSymbol1.split('(')
111
        for j in range(len(toInsert_1of3)-1):
112
            toInsert_1of3.insert(j+1, '(')
113

114
        for k in range(len(toInsert_1of3)):
115
            currentSymbol2 = toInsert_1of3[k]
116
            toInsert_2of3 = currentSymbol2.split(')')
117
            for l in range(len(toInsert_2of3)-1):
118
                toInsert_2of3.insert(l+1, ')')
119

120
            for m in range(len(toInsert_2of3)):
121
                currentSymbol3 = toInsert_2of3[m]
122
                toInsert_3of3 = currentSymbol3.split(COMPRESS_SYMBOL)
123
                for n in range(len(toInsert_3of3)-1):
124
                    toInsert_3of3.insert(n+1, COMPRESS_SYMBOL)
125

126
                newEqnList += toInsert_3of3
127

128
    eqnList = [x for x in newEqnList if x != '']
129

130
    return evaluate(eqnList, tempSymDict)
131

132

133
def evaluate(symEqn, symdict, idNo=0):
134
    """
135
    Recursive function to perform symbolc computation.
136
    """
137

138
    if '(' not in symEqn:
139

140
        # evaluate all scalars
141
        while SCALE_SYMBOL in symEqn:
142
            index = symEqn.index(SCALE_SYMBOL)
143
            if index-1 < 0 or index+1 > len(symEqn):
144
                sys.exit('Error: missing operand')
145
            else:
146
                newName = 'scaled' + str(idNo); idNo += 1
147
                symdict[newName] = int(symEqn[index-1]) * symdict[symEqn[index+1]]
148
                del symEqn[index-1:index+2]
149
                symEqn.insert(index-1, newName)
150

151
        # evaluate all TPs
152
        while TP_SYMBOL in symEqn:
153
            index = symEqn.index(TP_SYMBOL)
154
            if index-1 < 0 or index+1 > len(symEqn):
155
                sys.exit('Error: missing operand')
156
            else:
157
                newName = 'prod' + str(idNo); idNo += 1
158
                symdict[newName] = np.tensordot(symdict[symEqn[index-1]], symdict[symEqn[index+1]], axes=0)
159
                del symEqn[index-1:index+2]
160
                symEqn.insert(index-1, newName)
161

162
        # evaluate all sums
163
        while ADD_SYMBOL in symEqn:
164
            index = symEqn.index(ADD_SYMBOL)
165
            if index-1 < 0 or index+1 > len(symEqn):
166
                sys.exit('Error: missing operand')
167
            else:
168
                newName = 'sum' + str(idNo); idNo += 1
169
                symdict[newName] = (symdict[symEqn[index-1]] + symdict[symEqn[index+1]])
170
                del symEqn[index-1:index+2]
171
                symEqn.insert(index-1, newName)
172

173
        return symdict[symEqn[0]]
174

175
    else:
176

177
        start = symEqn.index('(')
178
        end = getClosingParenIndex(symEqn, start)
179

180
        if COMPRESS_SYMBOL in symEqn:
181
            cSymIndex = symEqn.index(COMPRESS_SYMBOL)
182
            compressIndices = list(symEqn[cSymIndex+1])
183
            toCompress = evaluate(symEqn[start+1:end-1], symdict, idNo)
184

185
            nDim = len(toCompress.shape)
186

187
            # create string of length nDim for einsum function
188
            startIndex = 'a' # change to 'i' to be more conventional
189
            einSumString = []
190
            for i in range(nDim):
191
                einSumString.append(chr(ord(startIndex) + i))
192

193
            # now set all indices in compressIndices to the same letter...
194
            compressIndices = list(map(int, compressIndices))
195
            setIndexTo = einSumString[compressIndices[0]]
196
            for i in range(len(compressIndices)):
197
                einSumString[compressIndices[i]-1] = setIndexTo
198

199
            einSumString = ''.join(einSumString)
200

201
            newName = 'compress' + str(idNo); idNo += 1
202
            symdict[newName] = np.einsum(einSumString, toCompress)
203

204
            del symEqn[cSymIndex:end]
205
            symEqn.insert(cSymIndex, newName)
206

207
        else:
208
            newName = 'paren' + str(idNo); idNo += 1
209
            symdict[newName] = evaluate(symEqn[start+1:end-1], symdict, idNo)
210

211
            del symEqn[start:end]
212
            symEqn.insert(start, newName)
213

214
        return evaluate(symEqn, symdict, idNo)
215

216
def getClosingParenIndex(stringExpr, openParenIndex):
217
    """
218
    Helper function to get index of end paren corresponding to openParenIndex.
219
    """
220
    balance = 1
221
    end = openParenIndex + 1
222
    while end < len(stringExpr) and balance != 0:
223
        if stringExpr[end] == '(':
224
            balance += 1
225
        elif stringExpr[end] == ')':
226
            balance -= 1
227
        end += 1
228

229
    return end
230

231
Product

Resources

Company