📚 The CoCalc Library - books, templates and other resources
License: OTHER
""" starter code for word2vec skip-gram model with NCE loss1CS 20: "TensorFlow for Deep Learning Research"2cs20.stanford.edu3Chip Huyen ([email protected])4Lecture 045"""67import os8os.environ['TF_CPP_MIN_LOG_LEVEL']='2'910import numpy as np11from tensorflow.contrib.tensorboard.plugins import projector12import tensorflow as tf1314import utils15import word2vec_utils1617# Model hyperparameters18VOCAB_SIZE = 5000019BATCH_SIZE = 12820EMBED_SIZE = 128 # dimension of the word embedding vectors21SKIP_WINDOW = 1 # the context window22NUM_SAMPLED = 64 # number of negative examples to sample23LEARNING_RATE = 1.024NUM_TRAIN_STEPS = 10000025VISUAL_FLD = 'visualization'26SKIP_STEP = 50002728# Parameters for downloading data29DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'30EXPECTED_BYTES = 3134401631NUM_VISUALIZE = 3000 # number of tokens to visualize323334def word2vec(dataset):35""" Build the graph for word2vec model and train it """36# Step 1: get input, output from the dataset37with tf.name_scope('data'):38iterator = dataset.make_initializable_iterator()39center_words, target_words = iterator.get_next()4041""" Step 2 + 3: define weights and embedding lookup.42In word2vec, it's actually the weights that we care about43"""44with tf.name_scope('embed'):45embed_matrix = tf.get_variable('embed_matrix',46shape=[VOCAB_SIZE, EMBED_SIZE],47initializer=tf.random_uniform_initializer())48embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding')4950# Step 4: construct variables for NCE loss and define loss function51with tf.name_scope('loss'):52nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE],53initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5)))54nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))5556# define loss function to be NCE loss function57loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight,58biases=nce_bias,59labels=target_words,60inputs=embed,61num_sampled=NUM_SAMPLED,62num_classes=VOCAB_SIZE), name='loss')6364# Step 5: define optimizer65with tf.name_scope('optimizer'):66optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)6768utils.safe_mkdir('checkpoints')6970with tf.Session() as sess:71sess.run(iterator.initializer)72sess.run(tf.global_variables_initializer())7374total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps75writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph)7677for index in range(NUM_TRAIN_STEPS):78try:79loss_batch, _ = sess.run([loss, optimizer])80total_loss += loss_batch81if (index + 1) % SKIP_STEP == 0:82print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP))83total_loss = 0.084except tf.errors.OutOfRangeError:85sess.run(iterator.initializer)86writer.close()8788def gen():89yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES, VOCAB_SIZE,90BATCH_SIZE, SKIP_WINDOW, VISUAL_FLD)9192def main():93dataset = tf.data.Dataset.from_generator(gen,94(tf.int32, tf.int32),95(tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1])))96word2vec(dataset)9798if __name__ == '__main__':99main()100101102