Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132928 views
License: OTHER
1
import time
2
3
import numpy as np
4
import tensorflow as tf
5
6
import config
7
8
class ChatBotModel:
9
def __init__(self, forward_only, batch_size):
10
"""forward_only: if set, we do not construct the backward pass in the model.
11
"""
12
print('Initialize new model')
13
self.fw_only = forward_only
14
self.batch_size = batch_size
15
16
def _create_placeholders(self):
17
# Feeds for inputs. It's a list of placeholders
18
print('Create placeholders')
19
self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{}'.format(i))
20
for i in range(config.BUCKETS[-1][0])]
21
self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='decoder{}'.format(i))
22
for i in range(config.BUCKETS[-1][1] + 1)]
23
self.decoder_masks = [tf.placeholder(tf.float32, shape=[None], name='mask{}'.format(i))
24
for i in range(config.BUCKETS[-1][1] + 1)]
25
26
# Our targets are decoder inputs shifted by one (to ignore <GO> symbol)
27
self.targets = self.decoder_inputs[1:]
28
29
def _inference(self):
30
print('Create inference')
31
# If we use sampled softmax, we need an output projection.
32
# Sampled softmax only makes sense if we sample less than vocabulary size.
33
if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB:
34
w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB])
35
b = tf.get_variable('proj_b', [config.DEC_VOCAB])
36
self.output_projection = (w, b)
37
38
def sampled_loss(logits, labels):
39
labels = tf.reshape(labels, [-1, 1])
40
return tf.nn.sampled_softmax_loss(weights=tf.transpose(w),
41
biases=b,
42
inputs=logits,
43
labels=labels,
44
num_sampled=config.NUM_SAMPLES,
45
num_classes=config.DEC_VOCAB)
46
self.softmax_loss_function = sampled_loss
47
48
single_cell = tf.contrib.rnn.GRUCell(config.HIDDEN_SIZE)
49
self.cell = tf.contrib.rnn.MultiRNNCell([single_cell for _ in range(config.NUM_LAYERS)])
50
51
def _create_loss(self):
52
print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.')
53
start = time.time()
54
def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
55
setattr(tf.contrib.rnn.GRUCell, '__deepcopy__', lambda self, _: self)
56
setattr(tf.contrib.rnn.MultiRNNCell, '__deepcopy__', lambda self, _: self)
57
return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
58
encoder_inputs, decoder_inputs, self.cell,
59
num_encoder_symbols=config.ENC_VOCAB,
60
num_decoder_symbols=config.DEC_VOCAB,
61
embedding_size=config.HIDDEN_SIZE,
62
output_projection=self.output_projection,
63
feed_previous=do_decode)
64
65
if self.fw_only:
66
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
67
self.encoder_inputs,
68
self.decoder_inputs,
69
self.targets,
70
self.decoder_masks,
71
config.BUCKETS,
72
lambda x, y: _seq2seq_f(x, y, True),
73
softmax_loss_function=self.softmax_loss_function)
74
# If we use output projection, we need to project outputs for decoding.
75
if self.output_projection:
76
for bucket in range(len(config.BUCKETS)):
77
self.outputs[bucket] = [tf.matmul(output,
78
self.output_projection[0]) + self.output_projection[1]
79
for output in self.outputs[bucket]]
80
else:
81
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
82
self.encoder_inputs,
83
self.decoder_inputs,
84
self.targets,
85
self.decoder_masks,
86
config.BUCKETS,
87
lambda x, y: _seq2seq_f(x, y, False),
88
softmax_loss_function=self.softmax_loss_function)
89
print('Time:', time.time() - start)
90
91
def _creat_optimizer(self):
92
print('Create optimizer... \nIt might take a couple of minutes depending on how many buckets you have.')
93
with tf.variable_scope('training') as scope:
94
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
95
96
if not self.fw_only:
97
self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
98
trainables = tf.trainable_variables()
99
self.gradient_norms = []
100
self.train_ops = []
101
start = time.time()
102
for bucket in range(len(config.BUCKETS)):
103
104
clipped_grads, norm = tf.clip_by_global_norm(tf.gradients(self.losses[bucket],
105
trainables),
106
config.MAX_GRAD_NORM)
107
self.gradient_norms.append(norm)
108
self.train_ops.append(self.optimizer.apply_gradients(zip(clipped_grads, trainables),
109
global_step=self.global_step))
110
print('Creating opt for bucket {} took {} seconds'.format(bucket, time.time() - start))
111
start = time.time()
112
113
114
def _create_summary(self):
115
pass
116
117
def build_graph(self):
118
self._create_placeholders()
119
self._inference()
120
self._create_loss()
121
self._creat_optimizer()
122
self._create_summary()
123