📚 The CoCalc Library - books, templates and other resources
License: OTHER
""" A neural chatbot using sequence to sequence model with1attentional decoder.23This is based on Google Translate Tensorflow model4https://github.com/tensorflow/models/blob/master/tutorials/rnn/translate/56Sequence to sequence model by Cho et al.(2014)78Created by Chip Huyen as the starter code for assignment 3,9class CS 20SI: "TensorFlow for Deep Learning Research"10cs20si.stanford.edu1112This file contains the code to build the model1314See readme.md for instruction on how to run the starter code.15"""16from __future__ import print_function1718import time1920import numpy as np21import tensorflow as tf2223import config2425class ChatBotModel(object):26def __init__(self, forward_only, batch_size):27"""forward_only: if set, we do not construct the backward pass in the model.28"""29print('Initialize new model')30self.fw_only = forward_only31self.batch_size = batch_size3233def _create_placeholders(self):34# Feeds for inputs. It's a list of placeholders35print('Create placeholders')36self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{}'.format(i))37for i in range(config.BUCKETS[-1][0])]38self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='decoder{}'.format(i))39for i in range(config.BUCKETS[-1][1] + 1)]40self.decoder_masks = [tf.placeholder(tf.float32, shape=[None], name='mask{}'.format(i))41for i in range(config.BUCKETS[-1][1] + 1)]4243# Our targets are decoder inputs shifted by one (to ignore <s> symbol)44self.targets = self.decoder_inputs[1:]4546def _inference(self):47print('Create inference')48# If we use sampled softmax, we need an output projection.49# Sampled softmax only makes sense if we sample less than vocabulary size.50if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB:51w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB])52b = tf.get_variable('proj_b', [config.DEC_VOCAB])53self.output_projection = (w, b)5455def sampled_loss(inputs, labels):56labels = tf.reshape(labels, [-1, 1])57return tf.nn.sampled_softmax_loss(tf.transpose(w), b, inputs, labels,58config.NUM_SAMPLES, config.DEC_VOCAB)59self.softmax_loss_function = sampled_loss6061single_cell = tf.nn.rnn_cell.GRUCell(config.HIDDEN_SIZE)62self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * config.NUM_LAYERS)6364def _create_loss(self):65print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.')66start = time.time()67def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):68return tf.nn.seq2seq.embedding_attention_seq2seq(69encoder_inputs, decoder_inputs, self.cell,70num_encoder_symbols=config.ENC_VOCAB,71num_decoder_symbols=config.DEC_VOCAB,72embedding_size=config.HIDDEN_SIZE,73output_projection=self.output_projection,74feed_previous=do_decode)7576if self.fw_only:77self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(78self.encoder_inputs,79self.decoder_inputs,80self.targets,81self.decoder_masks,82config.BUCKETS,83lambda x, y: _seq2seq_f(x, y, True),84softmax_loss_function=self.softmax_loss_function)85# If we use output projection, we need to project outputs for decoding.86if self.output_projection:87for bucket in range(len(config.BUCKETS)):88self.outputs[bucket] = [tf.matmul(output,89self.output_projection[0]) + self.output_projection[1]90for output in self.outputs[bucket]]91else:92self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(93self.encoder_inputs,94self.decoder_inputs,95self.targets,96self.decoder_masks,97config.BUCKETS,98lambda x, y: _seq2seq_f(x, y, False),99softmax_loss_function=self.softmax_loss_function)100print('Time:', time.time() - start)101102def _creat_optimizer(self):103print('Create optimizer... \nIt might take a couple of minutes depending on how many buckets you have.')104with tf.variable_scope('training') as scope:105self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')106107if not self.fw_only:108self.optimizer = tf.train.GradientDescentOptimizer(config.LR)109trainables = tf.trainable_variables()110self.gradient_norms = []111self.train_ops = []112start = time.time()113for bucket in range(len(config.BUCKETS)):114115clipped_grads, norm = tf.clip_by_global_norm(tf.gradients(self.losses[bucket],116trainables),117config.MAX_GRAD_NORM)118self.gradient_norms.append(norm)119self.train_ops.append(self.optimizer.apply_gradients(zip(clipped_grads, trainables),120global_step=self.global_step))121print('Creating opt for bucket {} took {} seconds'.format(bucket, time.time() - start))122start = time.time()123124125def _create_summary(self):126pass127128def build_graph(self):129self._create_placeholders()130self._inference()131self._create_loss()132self._creat_optimizer()133self._create_summary()134135136