CoCalc -- rnn_precompile.py

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / data-science-ipython-notebooks / deep-learning / theano-tutorial / rnn_tutorial / rnn_precompile.py
¹³²⁹²⁸ views
License: OTHER
1
"""This file is only here to speed up the execution of notebooks.
2

3
It contains a subset of the code defined in simple_rnn.ipynb and
4
lstm_text.ipynb, in particular the code compiling Theano function.
5
Executing this script first will populate the cache of compiled C code,
6
which will make subsequent compilations faster.
7

8
The use case is to run this script in the background when a demo VM
9
such as the one for NVIDIA's qwikLABS, so that the compilation phase
10
started from the notebooks is faster.
11

12
"""
13
import numpy
14

15
import theano
16
import theano.tensor as T
17

18
from theano import config
19
from theano.tensor.nnet import categorical_crossentropy
20

21

22
floatX = theano.config.floatX
23

24

25
# simple_rnn.ipynb
26

27
class SimpleRNN(object):
28
    def __init__(self, input_dim, recurrent_dim):
29
        w_xh = numpy.random.normal(0, .01, (input_dim, recurrent_dim))
30
        w_hh = numpy.random.normal(0, .02, (recurrent_dim, recurrent_dim))
31
        self.w_xh = theano.shared(numpy.asarray(w_xh, dtype=floatX), name='w_xh')
32
        self.w_hh = theano.shared(numpy.asarray(w_hh, dtype=floatX), name='w_hh')
33
        self.b_h = theano.shared(numpy.zeros((recurrent_dim,), dtype=floatX), name='b_h')
34
        self.parameters = [self.w_xh, self.w_hh, self.b_h]
35

36
    def _step(self, input_t, previous):
37
        return T.tanh(T.dot(previous, self.w_hh) + input_t)
38

39
    def __call__(self, x):
40
        x_w_xh = T.dot(x, self.w_xh) + self.b_h
41
        result, updates = theano.scan(self._step,
42
                                      sequences=[x_w_xh],
43
                                      outputs_info=[T.zeros_like(self.b_h)])
44
        return result
45

46

47
w_ho_np = numpy.random.normal(0, .01, (15, 1))
48
w_ho = theano.shared(numpy.asarray(w_ho_np, dtype=floatX), name='w_ho')
49
b_o = theano.shared(numpy.zeros((1,), dtype=floatX), name='b_o')
50

51
x = T.matrix('x')
52
my_rnn = SimpleRNN(1, 15)
53
hidden = my_rnn(x)
54
prediction = T.dot(hidden, w_ho) + b_o
55
parameters = my_rnn.parameters + [w_ho, b_o]
56
l2 = sum((p**2).sum() for p in parameters)
57
mse = T.mean((prediction[:-1] - x[1:])**2)
58
cost = mse + .0001 * l2
59
gradient = T.grad(cost, wrt=parameters)
60

61
lr = .3
62
updates = [(par, par - lr * gra) for par, gra in zip(parameters, gradient)]
63
update_model = theano.function([x], cost, updates=updates)
64
get_cost = theano.function([x], mse)
65
predict = theano.function([x], prediction)
66
get_hidden = theano.function([x], hidden)
67
get_gradient = theano.function([x], gradient)
68

69
predict = theano.function([x], prediction)
70

71
# Generating sequences
72

73
x_t = T.vector()
74
h_p = T.vector()
75
preactivation = T.dot(x_t, my_rnn.w_xh) + my_rnn.b_h
76
h_t = my_rnn._step(preactivation, h_p)
77
o_t = T.dot(h_t, w_ho) + b_o
78

79
single_step = theano.function([x_t, h_p], [o_t, h_t])
80

81
# lstm_text.ipynb
82

83
def gauss_weight(rng, ndim_in, ndim_out=None, sd=.005):
84
    if ndim_out is None:
85
        ndim_out = ndim_in
86
    W = rng.randn(ndim_in, ndim_out) * sd
87
    return numpy.asarray(W, dtype=config.floatX)
88

89

90
def index_dot(indices, w):
91
    return w[indices.flatten()]
92

93

94
class LstmLayer:
95

96
    def __init__(self, rng, input, mask, n_in, n_h):
97

98
        # Init params
99
        self.W_i = theano.shared(gauss_weight(rng, n_in, n_h), 'W_i', borrow=True)
100
        self.W_f = theano.shared(gauss_weight(rng, n_in, n_h), 'W_f', borrow=True)
101
        self.W_c = theano.shared(gauss_weight(rng, n_in, n_h), 'W_c', borrow=True)
102
        self.W_o = theano.shared(gauss_weight(rng, n_in, n_h), 'W_o', borrow=True)
103

104
        self.U_i = theano.shared(gauss_weight(rng, n_h), 'U_i', borrow=True)
105
        self.U_f = theano.shared(gauss_weight(rng, n_h), 'U_f', borrow=True)
106
        self.U_c = theano.shared(gauss_weight(rng, n_h), 'U_c', borrow=True)
107
        self.U_o = theano.shared(gauss_weight(rng, n_h), 'U_o', borrow=True)
108

109
        self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
110
                                 'b_i', borrow=True)
111
        self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
112
                                 'b_f', borrow=True)
113
        self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
114
                                 'b_c', borrow=True)
115
        self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
116
                                 'b_o', borrow=True)
117

118
        self.params = [self.W_i, self.W_f, self.W_c, self.W_o,
119
                       self.U_i, self.U_f, self.U_c, self.U_o,
120
                       self.b_i, self.b_f, self.b_c, self.b_o]
121

122
        outputs_info = [T.zeros((input.shape[1], n_h)),
123
                        T.zeros((input.shape[1], n_h))]
124

125
        rval, updates = theano.scan(self._step,
126
                                    sequences=[mask, input],
127
                                    outputs_info=outputs_info)
128

129
        # self.output is in the format (length, batchsize, n_h)
130
        self.output = rval[0]
131

132
    def _step(self, m_, x_, h_, c_):
133

134
        i_preact = (index_dot(x_, self.W_i) +
135
                    T.dot(h_, self.U_i) + self.b_i)
136
        i = T.nnet.sigmoid(i_preact)
137

138
        f_preact = (index_dot(x_, self.W_f) +
139
                    T.dot(h_, self.U_f) + self.b_f)
140
        f = T.nnet.sigmoid(f_preact)
141

142
        o_preact = (index_dot(x_, self.W_o) +
143
                    T.dot(h_, self.U_o) + self.b_o)
144
        o = T.nnet.sigmoid(o_preact)
145

146
        c_preact = (index_dot(x_, self.W_c) +
147
                    T.dot(h_, self.U_c) + self.b_c)
148
        c = T.tanh(c_preact)
149

150
        c = f * c_ + i * c
151
        c = m_[:, None] * c + (1. - m_)[:, None] * c_
152

153
        h = o * T.tanh(c)
154
        h = m_[:, None] * h + (1. - m_)[:, None] * h_
155

156
        return h, c
157

158

159
def sequence_categorical_crossentropy(prediction, targets, mask):
160
    prediction_flat = prediction.reshape(((prediction.shape[0] *
161
                                           prediction.shape[1]),
162
                                          prediction.shape[2]), ndim=2)
163
    targets_flat = targets.flatten()
164
    mask_flat = mask.flatten()
165
    ce = categorical_crossentropy(prediction_flat, targets_flat)
166
    return T.sum(ce * mask_flat)
167

168

169
class LogisticRegression(object):
170

171
    def __init__(self, rng, input, n_in, n_out):
172

173
        W = gauss_weight(rng, n_in, n_out)
174
        self.W = theano.shared(value=numpy.asarray(W, dtype=theano.config.floatX),
175
                               name='W', borrow=True)
176
        # initialize the biases b as a vector of n_out 0s
177
        self.b = theano.shared(value=numpy.zeros((n_out,),
178
                                                 dtype=theano.config.floatX),
179
                               name='b', borrow=True)
180

181
        # compute vector of class-membership probabilities in symbolic form
182
        energy = T.dot(input, self.W) + self.b
183
        energy_exp = T.exp(energy - T.max(energy, axis=2, keepdims=True))
184
        pmf = energy_exp / energy_exp.sum(axis=2, keepdims=True)
185
        self.p_y_given_x = pmf
186
        self.params = [self.W, self.b]
187

188
batch_size = 100
189
n_h = 50
190

191
# The Theano graph
192
# Set the random number generator' seeds for consistency
193
rng = numpy.random.RandomState(12345)
194

195
x = T.lmatrix('x')
196
mask = T.matrix('mask')
197

198
# Construct an LSTM layer and a logistic regression layer
199
recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h)
200
logreg_layer = LogisticRegression(rng=rng, input=recurrent_layer.output[:-1],
201
                                  n_in=n_h, n_out=111)
202

203
# define a cost variable to optimize
204
cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x,
205
                                         x[1:],
206
                                         mask[1:]) / batch_size
207

208
# create a list of all model parameters to be fit by gradient descent
209
params = logreg_layer.params + recurrent_layer.params
210

211
# create a list of gradients for all model parameters
212
grads = T.grad(cost, params)
213

214
learning_rate = 0.1
215
updates = [
216
    (param_i, param_i - learning_rate * grad_i)
217
    for param_i, grad_i in zip(params, grads)
218
]
219

220
update_model = theano.function([x, mask], cost, updates=updates)
221

222
evaluate_model = theano.function([x, mask], cost)
223

224
# Generating Sequences
225
x_t = T.iscalar()
226
h_p = T.vector()
227
c_p = T.vector()
228
h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p)
229
energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b
230

231
energy_exp = T.exp(energy - T.max(energy, axis=1, keepdims=True))
232

233
output = energy_exp / energy_exp.sum(axis=1, keepdims=True)
234
single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t])
235

236
Product

Resources

Company