CoCalc -- neural_gpu

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / stanford-tensorflow-tutorials / 2017 / examples / cgru / neural_gpu_v3.py
¹³²⁹³⁷ views
License: OTHER
1
def neural_gpu(features, hparams, name=None):
2
  """The core Neural GPU."""
3
  with tf.variable_scope(name, "neural_gpu"):
4
    inputs = features["inputs"]
5
    emb_inputs = common_layers.embedding(
6
        inputs, hparams.vocab_size, hparams.hidden_size)
7

8
    def step(state, inp):
9
      x = tf.nn.dropout(state, 1.0 - hparams.dropout)
10
      for layer in xrange(hparams.num_hidden_layers):
11
        x = common_layers.conv_gru(
12
            x, hparams.kernel_size, hparams.hidden_size, name="cgru_%d" % layer)
13
      return tf.where(inp == 0, state, x)  # No-op where inp is just padding=0.
14

15
    final = tf.foldl(step, tf.transpose(inputs, [1, 0]),
16
                     initializer=emb_inputs,
17
                     parallel_iterations=1, swap_memory=True)
18
    return common_layers.conv(final, hparams.vocab_size, 3, padding="same")
19

20

21
def mixed_curriculum(inputs, hparams):
22
  """Mixed curriculum: skip short sequences, but only with some probability."""
23
  with tf.name_scope("mixed_curriculum"):
24
    inputs_length = tf.to_float(tf.shape(inputs)[1])
25
    used_length = tf.cond(tf.less(tf.random_uniform([]),
26
                                  hparams.curriculum_mixing_probability),
27
                          lambda: tf.constant(0.0),
28
                          lambda: inputs_length)
29
    step = tf.to_float(tf.contrib.framework.get_global_step())
30
    relative_step = step / hparams.curriculum_lengths_per_step
31
    return used_length - hparams.curriculum_min_length > relative_step
32

33

34
def neural_gpu_curriculum(features, hparams, mode):
35
  """The Neural GPU model with curriculum."""
36
  with tf.name_scope("neural_gpu_with_curriculum"):
37
    inputs = features["inputs"]
38
    is_training = mode == tf.contrib.learn.ModeKeys.TRAIN
39
    should_skip = tf.logical_and(is_training, mixed_curriculum(inputs, hparams))
40
    final_shape = tf.concat([tf.shape(inputs),
41
                             tf.constant([hparams.vocab_size])], axis=0)
42
    outputs = tf.cond(should_skip,
43
                      lambda: tf.zeros(final_shape),
44
                      lambda: neural_gpu(features, hparams))
45
    return outputs, should_skip
46

47

48
def basic_params1():
49
  """A set of basic hyperparameters."""
50
  return tf.HParams(batch_size=32,
51
                    num_hidden_layers=4,
52
                    kernel_size=3,
53
                    hidden_size=64,
54
                    vocab_size=256,
55
                    dropout=0.2,
56
                    clip_grad_norm=2.0,
57
                    initializer="orthogonal",
58
                    initializer_gain=1.5,
59
                    label_smoothing=0.1,
60
                    optimizer="Adam",
61
                    optimizer_adam_epsilon=1e-4,
62
                    optimizer_momentum_momentum=0.9,
63
                    max_train_length=512,
64
                    learning_rate_decay_scheme="none",
65
                    learning_rate_warmup_steps=100,
66
                    learning_rate=0.1)
67

68

69
def curriculum_params1():
70
  """Set of hyperparameters with curriculum settings."""
71
  hparams = common_hparams.basic_params1()
72
  hparams.add_hparam("curriculum_mixing_probability", 0.1)
73
  hparams.add_hparam("curriculum_lengths_per_step", 1000.0)
74
  hparams.add_hparam("curriculum_min_length", 10)
75
  return hparams
76

77
Product

Resources

Company