Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132937 views
License: OTHER
1
def neural_gpu(features, hparams, name=None):
2
"""The core Neural GPU."""
3
with tf.variable_scope(name, "neural_gpu"):
4
inputs = features["inputs"]
5
emb_inputs = common_layers.embedding(
6
inputs, hparams.vocab_size, hparams.hidden_size)
7
8
def step(state, inp):
9
x = tf.nn.dropout(state, 1.0 - hparams.dropout)
10
for layer in xrange(hparams.num_hidden_layers):
11
x = common_layers.conv_gru(
12
x, hparams.kernel_size, hparams.hidden_size, name="cgru_%d" % layer)
13
return tf.where(inp == 0, state, x) # No-op where inp is just padding=0.
14
15
final = tf.foldl(step, tf.transpose(inputs, [1, 0]),
16
initializer=emb_inputs,
17
parallel_iterations=1, swap_memory=True)
18
return common_layers.conv(final, hparams.vocab_size, 3, padding="same")
19
20
21
def mixed_curriculum(inputs, hparams):
22
"""Mixed curriculum: skip short sequences, but only with some probability."""
23
with tf.name_scope("mixed_curriculum"):
24
inputs_length = tf.to_float(tf.shape(inputs)[1])
25
used_length = tf.cond(tf.less(tf.random_uniform([]),
26
hparams.curriculum_mixing_probability),
27
lambda: tf.constant(0.0),
28
lambda: inputs_length)
29
step = tf.to_float(tf.contrib.framework.get_global_step())
30
relative_step = step / hparams.curriculum_lengths_per_step
31
return used_length - hparams.curriculum_min_length > relative_step
32
33
34
def neural_gpu_curriculum(features, hparams, mode):
35
"""The Neural GPU model with curriculum."""
36
with tf.name_scope("neural_gpu_with_curriculum"):
37
inputs = features["inputs"]
38
is_training = mode == tf.contrib.learn.ModeKeys.TRAIN
39
should_skip = tf.logical_and(is_training, mixed_curriculum(inputs, hparams))
40
final_shape = tf.concat([tf.shape(inputs),
41
tf.constant([hparams.vocab_size])], axis=0)
42
outputs = tf.cond(should_skip,
43
lambda: tf.zeros(final_shape),
44
lambda: neural_gpu(features, hparams))
45
return outputs, should_skip
46
47
48
def basic_params1():
49
"""A set of basic hyperparameters."""
50
return tf.HParams(batch_size=32,
51
num_hidden_layers=4,
52
kernel_size=3,
53
hidden_size=64,
54
vocab_size=256,
55
dropout=0.2,
56
clip_grad_norm=2.0,
57
initializer="orthogonal",
58
initializer_gain=1.5,
59
label_smoothing=0.1,
60
optimizer="Adam",
61
optimizer_adam_epsilon=1e-4,
62
optimizer_momentum_momentum=0.9,
63
max_train_length=512,
64
learning_rate_decay_scheme="none",
65
learning_rate_warmup_steps=100,
66
learning_rate=0.1)
67
68
69
def curriculum_params1():
70
"""Set of hyperparameters with curriculum settings."""
71
hparams = common_hparams.basic_params1()
72
hparams.add_hparam("curriculum_mixing_probability", 0.1)
73
hparams.add_hparam("curriculum_lengths_per_step", 1000.0)
74
hparams.add_hparam("curriculum_min_length", 10)
75
return hparams
76
77