📚 The CoCalc Library - books, templates and other resources
License: OTHER
def neural_gpu(features, hparams, name=None):1"""The core Neural GPU."""2with tf.variable_scope(name, "neural_gpu"):3inputs = features["inputs"]4emb_inputs = common_layers.embedding(5inputs, hparams.vocab_size, hparams.hidden_size)67def step(state, inp):8x = tf.nn.dropout(state, 1.0 - hparams.dropout)9for layer in xrange(hparams.num_hidden_layers):10x = common_layers.conv_gru(11x, hparams.kernel_size, hparams.hidden_size, name="cgru_%d" % layer)12return tf.where(inp == 0, state, x) # No-op where inp is just padding=0.1314final = tf.foldl(step, tf.transpose(inputs, [1, 0]),15initializer=emb_inputs,16parallel_iterations=1, swap_memory=True)17return common_layers.conv(final, hparams.vocab_size, 3, padding="same")181920def mixed_curriculum(inputs, hparams):21"""Mixed curriculum: skip short sequences, but only with some probability."""22with tf.name_scope("mixed_curriculum"):23inputs_length = tf.to_float(tf.shape(inputs)[1])24used_length = tf.cond(tf.less(tf.random_uniform([]),25hparams.curriculum_mixing_probability),26lambda: tf.constant(0.0),27lambda: inputs_length)28step = tf.to_float(tf.contrib.framework.get_global_step())29relative_step = step / hparams.curriculum_lengths_per_step30return used_length - hparams.curriculum_min_length > relative_step313233def neural_gpu_curriculum(features, hparams, mode):34"""The Neural GPU model with curriculum."""35with tf.name_scope("neural_gpu_with_curriculum"):36inputs = features["inputs"]37is_training = mode == tf.contrib.learn.ModeKeys.TRAIN38should_skip = tf.logical_and(is_training, mixed_curriculum(inputs, hparams))39final_shape = tf.concat([tf.shape(inputs),40tf.constant([hparams.vocab_size])], axis=0)41outputs = tf.cond(should_skip,42lambda: tf.zeros(final_shape),43lambda: neural_gpu(features, hparams))44return outputs, should_skip454647def basic_params1():48"""A set of basic hyperparameters."""49return tf.HParams(batch_size=32,50num_hidden_layers=4,51kernel_size=3,52hidden_size=64,53vocab_size=256,54dropout=0.2,55clip_grad_norm=2.0,56initializer="orthogonal",57initializer_gain=1.5,58label_smoothing=0.1,59optimizer="Adam",60optimizer_adam_epsilon=1e-4,61optimizer_momentum_momentum=0.9,62max_train_length=512,63learning_rate_decay_scheme="none",64learning_rate_warmup_steps=100,65learning_rate=0.1)666768def curriculum_params1():69"""Set of hyperparameters with curriculum settings."""70hparams = common_hparams.basic_params1()71hparams.add_hparam("curriculum_mixing_probability", 0.1)72hparams.add_hparam("curriculum_lengths_per_step", 1000.0)73hparams.add_hparam("curriculum_min_length", 10)74return hparams757677