📚 The CoCalc Library - books, templates and other resources
cocalc-examples / data-science-ipython-notebooks / deep-learning / theano-tutorial / rnn_tutorial / rnn_precompile.py
132928 viewsLicense: OTHER
"""This file is only here to speed up the execution of notebooks.12It contains a subset of the code defined in simple_rnn.ipynb and3lstm_text.ipynb, in particular the code compiling Theano function.4Executing this script first will populate the cache of compiled C code,5which will make subsequent compilations faster.67The use case is to run this script in the background when a demo VM8such as the one for NVIDIA's qwikLABS, so that the compilation phase9started from the notebooks is faster.1011"""12import numpy1314import theano15import theano.tensor as T1617from theano import config18from theano.tensor.nnet import categorical_crossentropy192021floatX = theano.config.floatX222324# simple_rnn.ipynb2526class SimpleRNN(object):27def __init__(self, input_dim, recurrent_dim):28w_xh = numpy.random.normal(0, .01, (input_dim, recurrent_dim))29w_hh = numpy.random.normal(0, .02, (recurrent_dim, recurrent_dim))30self.w_xh = theano.shared(numpy.asarray(w_xh, dtype=floatX), name='w_xh')31self.w_hh = theano.shared(numpy.asarray(w_hh, dtype=floatX), name='w_hh')32self.b_h = theano.shared(numpy.zeros((recurrent_dim,), dtype=floatX), name='b_h')33self.parameters = [self.w_xh, self.w_hh, self.b_h]3435def _step(self, input_t, previous):36return T.tanh(T.dot(previous, self.w_hh) + input_t)3738def __call__(self, x):39x_w_xh = T.dot(x, self.w_xh) + self.b_h40result, updates = theano.scan(self._step,41sequences=[x_w_xh],42outputs_info=[T.zeros_like(self.b_h)])43return result444546w_ho_np = numpy.random.normal(0, .01, (15, 1))47w_ho = theano.shared(numpy.asarray(w_ho_np, dtype=floatX), name='w_ho')48b_o = theano.shared(numpy.zeros((1,), dtype=floatX), name='b_o')4950x = T.matrix('x')51my_rnn = SimpleRNN(1, 15)52hidden = my_rnn(x)53prediction = T.dot(hidden, w_ho) + b_o54parameters = my_rnn.parameters + [w_ho, b_o]55l2 = sum((p**2).sum() for p in parameters)56mse = T.mean((prediction[:-1] - x[1:])**2)57cost = mse + .0001 * l258gradient = T.grad(cost, wrt=parameters)5960lr = .361updates = [(par, par - lr * gra) for par, gra in zip(parameters, gradient)]62update_model = theano.function([x], cost, updates=updates)63get_cost = theano.function([x], mse)64predict = theano.function([x], prediction)65get_hidden = theano.function([x], hidden)66get_gradient = theano.function([x], gradient)6768predict = theano.function([x], prediction)6970# Generating sequences7172x_t = T.vector()73h_p = T.vector()74preactivation = T.dot(x_t, my_rnn.w_xh) + my_rnn.b_h75h_t = my_rnn._step(preactivation, h_p)76o_t = T.dot(h_t, w_ho) + b_o7778single_step = theano.function([x_t, h_p], [o_t, h_t])7980# lstm_text.ipynb8182def gauss_weight(rng, ndim_in, ndim_out=None, sd=.005):83if ndim_out is None:84ndim_out = ndim_in85W = rng.randn(ndim_in, ndim_out) * sd86return numpy.asarray(W, dtype=config.floatX)878889def index_dot(indices, w):90return w[indices.flatten()]919293class LstmLayer:9495def __init__(self, rng, input, mask, n_in, n_h):9697# Init params98self.W_i = theano.shared(gauss_weight(rng, n_in, n_h), 'W_i', borrow=True)99self.W_f = theano.shared(gauss_weight(rng, n_in, n_h), 'W_f', borrow=True)100self.W_c = theano.shared(gauss_weight(rng, n_in, n_h), 'W_c', borrow=True)101self.W_o = theano.shared(gauss_weight(rng, n_in, n_h), 'W_o', borrow=True)102103self.U_i = theano.shared(gauss_weight(rng, n_h), 'U_i', borrow=True)104self.U_f = theano.shared(gauss_weight(rng, n_h), 'U_f', borrow=True)105self.U_c = theano.shared(gauss_weight(rng, n_h), 'U_c', borrow=True)106self.U_o = theano.shared(gauss_weight(rng, n_h), 'U_o', borrow=True)107108self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),109'b_i', borrow=True)110self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),111'b_f', borrow=True)112self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),113'b_c', borrow=True)114self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),115'b_o', borrow=True)116117self.params = [self.W_i, self.W_f, self.W_c, self.W_o,118self.U_i, self.U_f, self.U_c, self.U_o,119self.b_i, self.b_f, self.b_c, self.b_o]120121outputs_info = [T.zeros((input.shape[1], n_h)),122T.zeros((input.shape[1], n_h))]123124rval, updates = theano.scan(self._step,125sequences=[mask, input],126outputs_info=outputs_info)127128# self.output is in the format (length, batchsize, n_h)129self.output = rval[0]130131def _step(self, m_, x_, h_, c_):132133i_preact = (index_dot(x_, self.W_i) +134T.dot(h_, self.U_i) + self.b_i)135i = T.nnet.sigmoid(i_preact)136137f_preact = (index_dot(x_, self.W_f) +138T.dot(h_, self.U_f) + self.b_f)139f = T.nnet.sigmoid(f_preact)140141o_preact = (index_dot(x_, self.W_o) +142T.dot(h_, self.U_o) + self.b_o)143o = T.nnet.sigmoid(o_preact)144145c_preact = (index_dot(x_, self.W_c) +146T.dot(h_, self.U_c) + self.b_c)147c = T.tanh(c_preact)148149c = f * c_ + i * c150c = m_[:, None] * c + (1. - m_)[:, None] * c_151152h = o * T.tanh(c)153h = m_[:, None] * h + (1. - m_)[:, None] * h_154155return h, c156157158def sequence_categorical_crossentropy(prediction, targets, mask):159prediction_flat = prediction.reshape(((prediction.shape[0] *160prediction.shape[1]),161prediction.shape[2]), ndim=2)162targets_flat = targets.flatten()163mask_flat = mask.flatten()164ce = categorical_crossentropy(prediction_flat, targets_flat)165return T.sum(ce * mask_flat)166167168class LogisticRegression(object):169170def __init__(self, rng, input, n_in, n_out):171172W = gauss_weight(rng, n_in, n_out)173self.W = theano.shared(value=numpy.asarray(W, dtype=theano.config.floatX),174name='W', borrow=True)175# initialize the biases b as a vector of n_out 0s176self.b = theano.shared(value=numpy.zeros((n_out,),177dtype=theano.config.floatX),178name='b', borrow=True)179180# compute vector of class-membership probabilities in symbolic form181energy = T.dot(input, self.W) + self.b182energy_exp = T.exp(energy - T.max(energy, axis=2, keepdims=True))183pmf = energy_exp / energy_exp.sum(axis=2, keepdims=True)184self.p_y_given_x = pmf185self.params = [self.W, self.b]186187batch_size = 100188n_h = 50189190# The Theano graph191# Set the random number generator' seeds for consistency192rng = numpy.random.RandomState(12345)193194x = T.lmatrix('x')195mask = T.matrix('mask')196197# Construct an LSTM layer and a logistic regression layer198recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h)199logreg_layer = LogisticRegression(rng=rng, input=recurrent_layer.output[:-1],200n_in=n_h, n_out=111)201202# define a cost variable to optimize203cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x,204x[1:],205mask[1:]) / batch_size206207# create a list of all model parameters to be fit by gradient descent208params = logreg_layer.params + recurrent_layer.params209210# create a list of gradients for all model parameters211grads = T.grad(cost, params)212213learning_rate = 0.1214updates = [215(param_i, param_i - learning_rate * grad_i)216for param_i, grad_i in zip(params, grads)217]218219update_model = theano.function([x, mask], cost, updates=updates)220221evaluate_model = theano.function([x, mask], cost)222223# Generating Sequences224x_t = T.iscalar()225h_p = T.vector()226c_p = T.vector()227h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p)228energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b229230energy_exp = T.exp(energy - T.max(energy, axis=1, keepdims=True))231232output = energy_exp / energy_exp.sum(axis=1, keepdims=True)233single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t])234235236