CoCalc -- model.py

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / stanford-tensorflow-tutorials / 2017 / assignments / chatbot / model.py
¹³²⁹³⁷ views
License: OTHER
1
""" A neural chatbot using sequence to sequence model with
2
attentional decoder. 
3

4
This is based on Google Translate Tensorflow model 
5
https://github.com/tensorflow/models/blob/master/tutorials/rnn/translate/
6

7
Sequence to sequence model by Cho et al.(2014)
8

9
Created by Chip Huyen as the starter code for assignment 3,
10
class CS 20SI: "TensorFlow for Deep Learning Research"
11
cs20si.stanford.edu
12

13
This file contains the code to build the model
14

15
See readme.md for instruction on how to run the starter code.
16
"""
17
from __future__ import print_function
18

19
import time
20

21
import numpy as np
22
import tensorflow as tf
23

24
import config
25

26
class ChatBotModel(object):
27
    def __init__(self, forward_only, batch_size):
28
        """forward_only: if set, we do not construct the backward pass in the model.
29
        """
30
        print('Initialize new model')
31
        self.fw_only = forward_only
32
        self.batch_size = batch_size
33
    
34
    def _create_placeholders(self):
35
        # Feeds for inputs. It's a list of placeholders
36
        print('Create placeholders')
37
        self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{}'.format(i))
38
                               for i in range(config.BUCKETS[-1][0])]
39
        self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='decoder{}'.format(i))
40
                               for i in range(config.BUCKETS[-1][1] + 1)]
41
        self.decoder_masks = [tf.placeholder(tf.float32, shape=[None], name='mask{}'.format(i))
42
                              for i in range(config.BUCKETS[-1][1] + 1)]
43

44
        # Our targets are decoder inputs shifted by one (to ignore <s> symbol)
45
        self.targets = self.decoder_inputs[1:]
46
        
47
    def _inference(self):
48
        print('Create inference')
49
        # If we use sampled softmax, we need an output projection.
50
        # Sampled softmax only makes sense if we sample less than vocabulary size.
51
        if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB:
52
            w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB])
53
            b = tf.get_variable('proj_b', [config.DEC_VOCAB])
54
            self.output_projection = (w, b)
55

56
        def sampled_loss(inputs, labels):
57
            labels = tf.reshape(labels, [-1, 1])
58
            return tf.nn.sampled_softmax_loss(tf.transpose(w), b, inputs, labels, 
59
                                              config.NUM_SAMPLES, config.DEC_VOCAB)
60
        self.softmax_loss_function = sampled_loss
61

62
        single_cell = tf.nn.rnn_cell.GRUCell(config.HIDDEN_SIZE)
63
        self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * config.NUM_LAYERS)
64

65
    def _create_loss(self):
66
        print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.')
67
        start = time.time()
68
        def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
69
            return tf.nn.seq2seq.embedding_attention_seq2seq(
70
                    encoder_inputs, decoder_inputs, self.cell,
71
                    num_encoder_symbols=config.ENC_VOCAB,
72
                    num_decoder_symbols=config.DEC_VOCAB,
73
                    embedding_size=config.HIDDEN_SIZE,
74
                    output_projection=self.output_projection,
75
                    feed_previous=do_decode)
76

77
        if self.fw_only:
78
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
79
                                        self.encoder_inputs, 
80
                                        self.decoder_inputs, 
81
                                        self.targets,
82
                                        self.decoder_masks, 
83
                                        config.BUCKETS, 
84
                                        lambda x, y: _seq2seq_f(x, y, True),
85
                                        softmax_loss_function=self.softmax_loss_function)
86
            # If we use output projection, we need to project outputs for decoding.
87
            if self.output_projection:
88
                for bucket in range(len(config.BUCKETS)):
89
                    self.outputs[bucket] = [tf.matmul(output, 
90
                                            self.output_projection[0]) + self.output_projection[1]
91
                                            for output in self.outputs[bucket]]
92
        else:
93
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
94
                                        self.encoder_inputs, 
95
                                        self.decoder_inputs, 
96
                                        self.targets,
97
                                        self.decoder_masks,
98
                                        config.BUCKETS,
99
                                        lambda x, y: _seq2seq_f(x, y, False),
100
                                        softmax_loss_function=self.softmax_loss_function)
101
        print('Time:', time.time() - start)
102

103
    def _creat_optimizer(self):
104
        print('Create optimizer... \nIt might take a couple of minutes depending on how many buckets you have.')
105
        with tf.variable_scope('training') as scope:
106
            self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
107

108
            if not self.fw_only:
109
                self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
110
                trainables = tf.trainable_variables()
111
                self.gradient_norms = []
112
                self.train_ops = []
113
                start = time.time()
114
                for bucket in range(len(config.BUCKETS)):
115
                    
116
                    clipped_grads, norm = tf.clip_by_global_norm(tf.gradients(self.losses[bucket], 
117
                                                                 trainables),
118
                                                                 config.MAX_GRAD_NORM)
119
                    self.gradient_norms.append(norm)
120
                    self.train_ops.append(self.optimizer.apply_gradients(zip(clipped_grads, trainables), 
121
                                                            global_step=self.global_step))
122
                    print('Creating opt for bucket {} took {} seconds'.format(bucket, time.time() - start))
123
                    start = time.time()
124

125

126
    def _create_summary(self):
127
        pass
128

129
    def build_graph(self):
130
        self._create_placeholders()
131
        self._inference()
132
        self._create_loss()
133
        self._creat_optimizer()
134
        self._create_summary()
135

136
Product

Resources

Company