Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132930 views
License: OTHER
1
"""This file is only here to speed up the execution of notebooks.
2
3
It contains a subset of the code defined in simple_rnn.ipynb and
4
lstm_text.ipynb, in particular the code compiling Theano function.
5
Executing this script first will populate the cache of compiled C code,
6
which will make subsequent compilations faster.
7
8
The use case is to run this script in the background when a demo VM
9
such as the one for NVIDIA's qwikLABS, so that the compilation phase
10
started from the notebooks is faster.
11
12
"""
13
import numpy
14
15
import theano
16
import theano.tensor as T
17
18
from theano import config
19
from theano.tensor.nnet import categorical_crossentropy
20
21
22
floatX = theano.config.floatX
23
24
25
# simple_rnn.ipynb
26
27
class SimpleRNN(object):
28
def __init__(self, input_dim, recurrent_dim):
29
w_xh = numpy.random.normal(0, .01, (input_dim, recurrent_dim))
30
w_hh = numpy.random.normal(0, .02, (recurrent_dim, recurrent_dim))
31
self.w_xh = theano.shared(numpy.asarray(w_xh, dtype=floatX), name='w_xh')
32
self.w_hh = theano.shared(numpy.asarray(w_hh, dtype=floatX), name='w_hh')
33
self.b_h = theano.shared(numpy.zeros((recurrent_dim,), dtype=floatX), name='b_h')
34
self.parameters = [self.w_xh, self.w_hh, self.b_h]
35
36
def _step(self, input_t, previous):
37
return T.tanh(T.dot(previous, self.w_hh) + input_t)
38
39
def __call__(self, x):
40
x_w_xh = T.dot(x, self.w_xh) + self.b_h
41
result, updates = theano.scan(self._step,
42
sequences=[x_w_xh],
43
outputs_info=[T.zeros_like(self.b_h)])
44
return result
45
46
47
w_ho_np = numpy.random.normal(0, .01, (15, 1))
48
w_ho = theano.shared(numpy.asarray(w_ho_np, dtype=floatX), name='w_ho')
49
b_o = theano.shared(numpy.zeros((1,), dtype=floatX), name='b_o')
50
51
x = T.matrix('x')
52
my_rnn = SimpleRNN(1, 15)
53
hidden = my_rnn(x)
54
prediction = T.dot(hidden, w_ho) + b_o
55
parameters = my_rnn.parameters + [w_ho, b_o]
56
l2 = sum((p**2).sum() for p in parameters)
57
mse = T.mean((prediction[:-1] - x[1:])**2)
58
cost = mse + .0001 * l2
59
gradient = T.grad(cost, wrt=parameters)
60
61
lr = .3
62
updates = [(par, par - lr * gra) for par, gra in zip(parameters, gradient)]
63
update_model = theano.function([x], cost, updates=updates)
64
get_cost = theano.function([x], mse)
65
predict = theano.function([x], prediction)
66
get_hidden = theano.function([x], hidden)
67
get_gradient = theano.function([x], gradient)
68
69
predict = theano.function([x], prediction)
70
71
# Generating sequences
72
73
x_t = T.vector()
74
h_p = T.vector()
75
preactivation = T.dot(x_t, my_rnn.w_xh) + my_rnn.b_h
76
h_t = my_rnn._step(preactivation, h_p)
77
o_t = T.dot(h_t, w_ho) + b_o
78
79
single_step = theano.function([x_t, h_p], [o_t, h_t])
80
81
# lstm_text.ipynb
82
83
def gauss_weight(rng, ndim_in, ndim_out=None, sd=.005):
84
if ndim_out is None:
85
ndim_out = ndim_in
86
W = rng.randn(ndim_in, ndim_out) * sd
87
return numpy.asarray(W, dtype=config.floatX)
88
89
90
def index_dot(indices, w):
91
return w[indices.flatten()]
92
93
94
class LstmLayer:
95
96
def __init__(self, rng, input, mask, n_in, n_h):
97
98
# Init params
99
self.W_i = theano.shared(gauss_weight(rng, n_in, n_h), 'W_i', borrow=True)
100
self.W_f = theano.shared(gauss_weight(rng, n_in, n_h), 'W_f', borrow=True)
101
self.W_c = theano.shared(gauss_weight(rng, n_in, n_h), 'W_c', borrow=True)
102
self.W_o = theano.shared(gauss_weight(rng, n_in, n_h), 'W_o', borrow=True)
103
104
self.U_i = theano.shared(gauss_weight(rng, n_h), 'U_i', borrow=True)
105
self.U_f = theano.shared(gauss_weight(rng, n_h), 'U_f', borrow=True)
106
self.U_c = theano.shared(gauss_weight(rng, n_h), 'U_c', borrow=True)
107
self.U_o = theano.shared(gauss_weight(rng, n_h), 'U_o', borrow=True)
108
109
self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
110
'b_i', borrow=True)
111
self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
112
'b_f', borrow=True)
113
self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
114
'b_c', borrow=True)
115
self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
116
'b_o', borrow=True)
117
118
self.params = [self.W_i, self.W_f, self.W_c, self.W_o,
119
self.U_i, self.U_f, self.U_c, self.U_o,
120
self.b_i, self.b_f, self.b_c, self.b_o]
121
122
outputs_info = [T.zeros((input.shape[1], n_h)),
123
T.zeros((input.shape[1], n_h))]
124
125
rval, updates = theano.scan(self._step,
126
sequences=[mask, input],
127
outputs_info=outputs_info)
128
129
# self.output is in the format (length, batchsize, n_h)
130
self.output = rval[0]
131
132
def _step(self, m_, x_, h_, c_):
133
134
i_preact = (index_dot(x_, self.W_i) +
135
T.dot(h_, self.U_i) + self.b_i)
136
i = T.nnet.sigmoid(i_preact)
137
138
f_preact = (index_dot(x_, self.W_f) +
139
T.dot(h_, self.U_f) + self.b_f)
140
f = T.nnet.sigmoid(f_preact)
141
142
o_preact = (index_dot(x_, self.W_o) +
143
T.dot(h_, self.U_o) + self.b_o)
144
o = T.nnet.sigmoid(o_preact)
145
146
c_preact = (index_dot(x_, self.W_c) +
147
T.dot(h_, self.U_c) + self.b_c)
148
c = T.tanh(c_preact)
149
150
c = f * c_ + i * c
151
c = m_[:, None] * c + (1. - m_)[:, None] * c_
152
153
h = o * T.tanh(c)
154
h = m_[:, None] * h + (1. - m_)[:, None] * h_
155
156
return h, c
157
158
159
def sequence_categorical_crossentropy(prediction, targets, mask):
160
prediction_flat = prediction.reshape(((prediction.shape[0] *
161
prediction.shape[1]),
162
prediction.shape[2]), ndim=2)
163
targets_flat = targets.flatten()
164
mask_flat = mask.flatten()
165
ce = categorical_crossentropy(prediction_flat, targets_flat)
166
return T.sum(ce * mask_flat)
167
168
169
class LogisticRegression(object):
170
171
def __init__(self, rng, input, n_in, n_out):
172
173
W = gauss_weight(rng, n_in, n_out)
174
self.W = theano.shared(value=numpy.asarray(W, dtype=theano.config.floatX),
175
name='W', borrow=True)
176
# initialize the biases b as a vector of n_out 0s
177
self.b = theano.shared(value=numpy.zeros((n_out,),
178
dtype=theano.config.floatX),
179
name='b', borrow=True)
180
181
# compute vector of class-membership probabilities in symbolic form
182
energy = T.dot(input, self.W) + self.b
183
energy_exp = T.exp(energy - T.max(energy, axis=2, keepdims=True))
184
pmf = energy_exp / energy_exp.sum(axis=2, keepdims=True)
185
self.p_y_given_x = pmf
186
self.params = [self.W, self.b]
187
188
batch_size = 100
189
n_h = 50
190
191
# The Theano graph
192
# Set the random number generator' seeds for consistency
193
rng = numpy.random.RandomState(12345)
194
195
x = T.lmatrix('x')
196
mask = T.matrix('mask')
197
198
# Construct an LSTM layer and a logistic regression layer
199
recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h)
200
logreg_layer = LogisticRegression(rng=rng, input=recurrent_layer.output[:-1],
201
n_in=n_h, n_out=111)
202
203
# define a cost variable to optimize
204
cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x,
205
x[1:],
206
mask[1:]) / batch_size
207
208
# create a list of all model parameters to be fit by gradient descent
209
params = logreg_layer.params + recurrent_layer.params
210
211
# create a list of gradients for all model parameters
212
grads = T.grad(cost, params)
213
214
learning_rate = 0.1
215
updates = [
216
(param_i, param_i - learning_rate * grad_i)
217
for param_i, grad_i in zip(params, grads)
218
]
219
220
update_model = theano.function([x, mask], cost, updates=updates)
221
222
evaluate_model = theano.function([x, mask], cost)
223
224
# Generating Sequences
225
x_t = T.iscalar()
226
h_p = T.vector()
227
c_p = T.vector()
228
h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p)
229
energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b
230
231
energy_exp = T.exp(energy - T.max(energy, axis=1, keepdims=True))
232
233
output = energy_exp / energy_exp.sum(axis=1, keepdims=True)
234
single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t])
235
236