CoCalc -- model.py

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / stanford-tensorflow-tutorials / assignments / chatbot / model.py
¹³²⁹²⁸ views
License: OTHER
1
import time
2

3
import numpy as np
4
import tensorflow as tf
5

6
import config
7

8
class ChatBotModel:
9
    def __init__(self, forward_only, batch_size):
10
        """forward_only: if set, we do not construct the backward pass in the model.
11
        """
12
        print('Initialize new model')
13
        self.fw_only = forward_only
14
        self.batch_size = batch_size
15

16
    def _create_placeholders(self):
17
        # Feeds for inputs. It's a list of placeholders
18
        print('Create placeholders')
19
        self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{}'.format(i))
20
                               for i in range(config.BUCKETS[-1][0])]
21
        self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='decoder{}'.format(i))
22
                               for i in range(config.BUCKETS[-1][1] + 1)]
23
        self.decoder_masks = [tf.placeholder(tf.float32, shape=[None], name='mask{}'.format(i))
24
                              for i in range(config.BUCKETS[-1][1] + 1)]
25

26
        # Our targets are decoder inputs shifted by one (to ignore <GO> symbol)
27
        self.targets = self.decoder_inputs[1:]
28

29
    def _inference(self):
30
        print('Create inference')
31
        # If we use sampled softmax, we need an output projection.
32
        # Sampled softmax only makes sense if we sample less than vocabulary size.
33
        if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB:
34
            w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB])
35
            b = tf.get_variable('proj_b', [config.DEC_VOCAB])
36
            self.output_projection = (w, b)
37

38
        def sampled_loss(logits, labels):
39
            labels = tf.reshape(labels, [-1, 1])
40
            return tf.nn.sampled_softmax_loss(weights=tf.transpose(w), 
41
                                              biases=b, 
42
                                              inputs=logits, 
43
                                              labels=labels, 
44
                                              num_sampled=config.NUM_SAMPLES, 
45
                                              num_classes=config.DEC_VOCAB)
46
        self.softmax_loss_function = sampled_loss
47

48
        single_cell = tf.contrib.rnn.GRUCell(config.HIDDEN_SIZE)
49
        self.cell = tf.contrib.rnn.MultiRNNCell([single_cell for _ in range(config.NUM_LAYERS)])
50

51
    def _create_loss(self):
52
        print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.')
53
        start = time.time()
54
        def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
55
            setattr(tf.contrib.rnn.GRUCell, '__deepcopy__', lambda self, _: self)
56
            setattr(tf.contrib.rnn.MultiRNNCell, '__deepcopy__', lambda self, _: self)
57
            return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
58
                    encoder_inputs, decoder_inputs, self.cell,
59
                    num_encoder_symbols=config.ENC_VOCAB,
60
                    num_decoder_symbols=config.DEC_VOCAB,
61
                    embedding_size=config.HIDDEN_SIZE,
62
                    output_projection=self.output_projection,
63
                    feed_previous=do_decode)
64

65
        if self.fw_only:
66
            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
67
                                        self.encoder_inputs, 
68
                                        self.decoder_inputs, 
69
                                        self.targets,
70
                                        self.decoder_masks, 
71
                                        config.BUCKETS, 
72
                                        lambda x, y: _seq2seq_f(x, y, True),
73
                                        softmax_loss_function=self.softmax_loss_function)
74
            # If we use output projection, we need to project outputs for decoding.
75
            if self.output_projection:
76
                for bucket in range(len(config.BUCKETS)):
77
                    self.outputs[bucket] = [tf.matmul(output, 
78
                                            self.output_projection[0]) + self.output_projection[1]
79
                                            for output in self.outputs[bucket]]
80
        else:
81
            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
82
                                        self.encoder_inputs, 
83
                                        self.decoder_inputs, 
84
                                        self.targets,
85
                                        self.decoder_masks,
86
                                        config.BUCKETS,
87
                                        lambda x, y: _seq2seq_f(x, y, False),
88
                                        softmax_loss_function=self.softmax_loss_function)
89
        print('Time:', time.time() - start)
90

91
    def _creat_optimizer(self):
92
        print('Create optimizer... \nIt might take a couple of minutes depending on how many buckets you have.')
93
        with tf.variable_scope('training') as scope:
94
            self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
95

96
            if not self.fw_only:
97
                self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
98
                trainables = tf.trainable_variables()
99
                self.gradient_norms = []
100
                self.train_ops = []
101
                start = time.time()
102
                for bucket in range(len(config.BUCKETS)):
103
                    
104
                    clipped_grads, norm = tf.clip_by_global_norm(tf.gradients(self.losses[bucket], 
105
                                                                 trainables),
106
                                                                 config.MAX_GRAD_NORM)
107
                    self.gradient_norms.append(norm)
108
                    self.train_ops.append(self.optimizer.apply_gradients(zip(clipped_grads, trainables), 
109
                                                            global_step=self.global_step))
110
                    print('Creating opt for bucket {} took {} seconds'.format(bucket, time.time() - start))
111
                    start = time.time()
112

113

114
    def _create_summary(self):
115
        pass
116

117
    def build_graph(self):
118
        self._create_placeholders()
119
        self._inference()
120
        self._create_loss()
121
        self._creat_optimizer()
122
        self._create_summary()
123
Product

Resources

Company