Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132937 views
License: OTHER
1
""" A neural chatbot using sequence to sequence model with
2
attentional decoder.
3
4
This is based on Google Translate Tensorflow model
5
https://github.com/tensorflow/models/blob/master/tutorials/rnn/translate/
6
7
Sequence to sequence model by Cho et al.(2014)
8
9
Created by Chip Huyen as the starter code for assignment 3,
10
class CS 20SI: "TensorFlow for Deep Learning Research"
11
cs20si.stanford.edu
12
13
This file contains the code to build the model
14
15
See readme.md for instruction on how to run the starter code.
16
"""
17
from __future__ import print_function
18
19
import time
20
21
import numpy as np
22
import tensorflow as tf
23
24
import config
25
26
class ChatBotModel(object):
27
def __init__(self, forward_only, batch_size):
28
"""forward_only: if set, we do not construct the backward pass in the model.
29
"""
30
print('Initialize new model')
31
self.fw_only = forward_only
32
self.batch_size = batch_size
33
34
def _create_placeholders(self):
35
# Feeds for inputs. It's a list of placeholders
36
print('Create placeholders')
37
self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{}'.format(i))
38
for i in range(config.BUCKETS[-1][0])]
39
self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='decoder{}'.format(i))
40
for i in range(config.BUCKETS[-1][1] + 1)]
41
self.decoder_masks = [tf.placeholder(tf.float32, shape=[None], name='mask{}'.format(i))
42
for i in range(config.BUCKETS[-1][1] + 1)]
43
44
# Our targets are decoder inputs shifted by one (to ignore <s> symbol)
45
self.targets = self.decoder_inputs[1:]
46
47
def _inference(self):
48
print('Create inference')
49
# If we use sampled softmax, we need an output projection.
50
# Sampled softmax only makes sense if we sample less than vocabulary size.
51
if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB:
52
w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB])
53
b = tf.get_variable('proj_b', [config.DEC_VOCAB])
54
self.output_projection = (w, b)
55
56
def sampled_loss(inputs, labels):
57
labels = tf.reshape(labels, [-1, 1])
58
return tf.nn.sampled_softmax_loss(tf.transpose(w), b, inputs, labels,
59
config.NUM_SAMPLES, config.DEC_VOCAB)
60
self.softmax_loss_function = sampled_loss
61
62
single_cell = tf.nn.rnn_cell.GRUCell(config.HIDDEN_SIZE)
63
self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * config.NUM_LAYERS)
64
65
def _create_loss(self):
66
print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.')
67
start = time.time()
68
def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
69
return tf.nn.seq2seq.embedding_attention_seq2seq(
70
encoder_inputs, decoder_inputs, self.cell,
71
num_encoder_symbols=config.ENC_VOCAB,
72
num_decoder_symbols=config.DEC_VOCAB,
73
embedding_size=config.HIDDEN_SIZE,
74
output_projection=self.output_projection,
75
feed_previous=do_decode)
76
77
if self.fw_only:
78
self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
79
self.encoder_inputs,
80
self.decoder_inputs,
81
self.targets,
82
self.decoder_masks,
83
config.BUCKETS,
84
lambda x, y: _seq2seq_f(x, y, True),
85
softmax_loss_function=self.softmax_loss_function)
86
# If we use output projection, we need to project outputs for decoding.
87
if self.output_projection:
88
for bucket in range(len(config.BUCKETS)):
89
self.outputs[bucket] = [tf.matmul(output,
90
self.output_projection[0]) + self.output_projection[1]
91
for output in self.outputs[bucket]]
92
else:
93
self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
94
self.encoder_inputs,
95
self.decoder_inputs,
96
self.targets,
97
self.decoder_masks,
98
config.BUCKETS,
99
lambda x, y: _seq2seq_f(x, y, False),
100
softmax_loss_function=self.softmax_loss_function)
101
print('Time:', time.time() - start)
102
103
def _creat_optimizer(self):
104
print('Create optimizer... \nIt might take a couple of minutes depending on how many buckets you have.')
105
with tf.variable_scope('training') as scope:
106
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
107
108
if not self.fw_only:
109
self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
110
trainables = tf.trainable_variables()
111
self.gradient_norms = []
112
self.train_ops = []
113
start = time.time()
114
for bucket in range(len(config.BUCKETS)):
115
116
clipped_grads, norm = tf.clip_by_global_norm(tf.gradients(self.losses[bucket],
117
trainables),
118
config.MAX_GRAD_NORM)
119
self.gradient_norms.append(norm)
120
self.train_ops.append(self.optimizer.apply_gradients(zip(clipped_grads, trainables),
121
global_step=self.global_step))
122
print('Creating opt for bucket {} took {} seconds'.format(bucket, time.time() - start))
123
start = time.time()
124
125
126
def _create_summary(self):
127
pass
128
129
def build_graph(self):
130
self._create_placeholders()
131
self._inference()
132
self._create_loss()
133
self._creat_optimizer()
134
self._create_summary()
135
136