CoCalc -- class-5.ipynb

/ class-docs / class-5_20-july / class-5.ipynb

⁴³⁶ views

Kernel: Python 3

#Argument structure

In [3]:

%matplotlib inline
%run ../../code/symbolTools

In [4]:

# Initialize
symdict = dict()

# Create the vector encodings of ALL BASIS features.
# These features should be encoded together to guarantee that all those feature vectors have the same size.
basis_features = ['MASC', 'FEM', 'A', 'O', 'S', 'CAUS', 'VOL', 'ANIM', 'AFF', 'nonVOL', 'inANIM', 'ROT', 'TRAN', 'TACT', 'PAST', 'E', 'P', 'PERC']
BF = encode_symbols(len(basis_features), reptype='dist')
symdict = add_symbols(basis_features, BF, symdict)
#print(symdict)

In [5]:

symdict['J'] = interpretEqn('MASC', symdict) # Other features are ignored.
symdict['K'] = interpretEqn('FEM', symdict)  # Other features are ignored.
symdict['AG'] = interpretEqn('CAUS + VOL + ANIM', symdict)
symdict['PAT'] = interpretEqn('AFF + nonVOL + inANIM', symdict)
symdict['YAO'] = interpretEqn('AG @ A + PAT @ O', symdict)
symdict['YOS'] = interpretEqn('PAT @ S', symdict)
symdict['YAS'] = interpretEqn('AG @ S', symdict)
symdict['cocoa'] = interpretEqn('inANIM', symdict)
symdict['EXP'] = interpretEqn('AFF + PERC + ANIM', symdict)
symdict['STIM'] = interpretEqn('PERC', symdict)
symdict['Y_perc'] = interpretEqn('EXP @ A + STIM @ O', symdict)
symdict['Y_ES'] = interpretEqn('EXP @ S', symdict)
symdict['spun_sem'] = interpretEqn('ROT + PAST', symdict)
symdict['spun_syn'] = interpretEqn('YAO + YOS', symdict)
symdict['spun'] = interpretEqn('spun_sem @ spun_syn', symdict)
symdict['swirled_sem'] = interpretEqn('ROT + PAST', symdict)
symdict['swirled_syn'] = interpretEqn('YAO + YOS', symdict)
symdict['swirled'] = interpretEqn('swirled_sem @ swirled_syn', symdict)
symdict['slid_sem'] = interpretEqn('TRAN + PAST', symdict)
symdict['slid_syn'] = interpretEqn('YAO + YOS', symdict)
symdict['slid'] = interpretEqn('slid_sem @ slid_syn', symdict)
symdict['stirred_sem'] = interpretEqn('ROT + PAST', symdict)
symdict['stirred_syn'] = interpretEqn('YAO + YAS', symdict)
symdict['stirred'] = interpretEqn('stirred_sem @ stirred_syn', symdict)
symdict['felt_sem'] = interpretEqn('TACT + PAST', symdict)
symdict['felt_syn'] = interpretEqn('Y_perc', symdict)
symdict['felt'] = interpretEqn('felt_sem @ felt_syn', symdict)

In [6]:

# Instead of using the vectorized tensor product results, create the tensor product of vectors using tensordot(). 
K = interpretEqn('K', symdict)
spun_sem = interpretEqn('ROT + PAST', symdict)
spun_syn = interpretEqn('AG @ A + PAT @ O + PAT @ S', symdict)
S = interpretEqn('S', symdict)

print(spun_syn.shape) # 2 dimensional. 
temp = np.tensordot(spun_syn, S, axes=([1], [0])) # sum over the second dimension of the first array (temp_spun_syn) 
                                                  # and the first dimension of the second array(temp_S)
# We expect temp should be same as 'PAT' because AG and PAT bound with A and O will be cancelled out.
#(symdict['PAT'] == temp).all()   # compare every element and return True only when all elements are same.
np.allclose(symdict['PAT'], temp)

Out[6]:

(18, 18)

True

In [7]:

# Now check the whole sentences.
symdict['spun'] = interpretEqn('spun_sem @ spun_syn', symdict)
print('The representation space of [spun] has %d dimensions.' % symdict['spun'].ndim)
print('One from [spun_sem] and two from [spun_syn].')
symdict['spun_S'] = interpretEqn('spun @ S', symdict)  # Do not contract.
symdict['K_spun_S'] = interpretEqn('K @ spun_S', symdict)
print('The representation space for [K * (spun * S)] has %d dimensions.' % symdict['K_spun_S'].ndim)
print('The 2nd, 3rd, and 4th dimensions come from the representation space of [spun] \n \
where the last dimension contains the syntax information (A, S, O).')
print('Thus, we can contract the representation of whole across the 4th and 5th dimensions \n \
because the 5th dimension has also the same information.')

# Note that the 4th and 5th symbols are same. 
# By appThis does remove the 4th and 5th dimensions.
print(interpretEqn('$45(K_spun_S)', symdict).shape)

# We predict that the resulting representation would be same as semantic representation of 'K spun', FEM * (ROT * AFF). 

FEM_spun_sem_PAT = interpretEqn('4 * (FEM @ (spun_sem @ PAT))', symdict)
# spun_sem_PAT = np.tensordot(spun_sem, symdict['PAT'], axes=0)
# FEM_spun_sem_PAT = np.tensordot(symdict['FEM'], spun_sem_PAT, axes=0)

#(FEM_spun_sem_PAT == np.einsum('ijkll', K_spun_S)).all()
np.allclose(FEM_spun_sem_PAT, 4 * np.einsum('ijkll', symdict['K_spun_S']))

Out[7]:

The representation space of [spun] has 3 dimensions.
One from [spun_sem] and two from [spun_syn].
The representation space for [K * (spun * S)] has 5 dimensions.
The 2nd, 3rd, and 4th dimensions come from the representation space of [spun] 
 where the last dimension contains the syntax information (A, S, O).
Thus, we can contract the representation of whole across the 4th and 5th dimensions 
 because the 5th dimension has also the same information.
(18, 18, 18)

True

In [ ]:

Product

Resources

Company