📚 The CoCalc Library - books, templates and other resources
License: OTHER
""" starter code for word2vec skip-gram model with NCE loss1Eager execution2CS 20: "TensorFlow for Deep Learning Research"3cs20.stanford.edu4Chip Huyen ([email protected]) & Akshay Agrawal ([email protected])5Lecture 046"""78import os9os.environ['TF_CPP_MIN_LOG_LEVEL']='2'1011import numpy as np12import tensorflow as tf13import tensorflow.contrib.eager as tfe1415import utils16import word2vec_utils1718tfe.enable_eager_execution()1920# Model hyperparameters21VOCAB_SIZE = 5000022BATCH_SIZE = 12823EMBED_SIZE = 128 # dimension of the word embedding vectors24SKIP_WINDOW = 1 # the context window25NUM_SAMPLED = 64 # number of negative examples to sample26LEARNING_RATE = 1.027NUM_TRAIN_STEPS = 10000028VISUAL_FLD = 'visualization'29SKIP_STEP = 50003031# Parameters for downloading data32DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'33EXPECTED_BYTES = 313440163435class Word2Vec(object):36def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):37self.vocab_size = vocab_size38self.num_sampled = num_sampled39self.embed_matrix = tfe.Variable(tf.random_uniform(40[vocab_size, embed_size]))41self.nce_weight = tfe.Variable(tf.truncated_normal(42[vocab_size, embed_size],43stddev=1.0 / (embed_size ** 0.5)))44self.nce_bias = tfe.Variable(tf.zeros([vocab_size]))4546def compute_loss(self, center_words, target_words):47"""Computes the forward pass of word2vec with the NCE loss."""48embed = tf.nn.embedding_lookup(self.embed_matrix, center_words)49loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight,50biases=self.nce_bias,51labels=target_words,52inputs=embed,53num_sampled=self.num_sampled,54num_classes=self.vocab_size))55return loss565758def gen():59yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,60VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,61VISUAL_FLD)6263def main():64dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),65(tf.TensorShape([BATCH_SIZE]),66tf.TensorShape([BATCH_SIZE, 1])))67optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)68model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)69grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)70total_loss = 0.0 # for average loss in the last SKIP_STEP steps71num_train_steps = 072while num_train_steps < NUM_TRAIN_STEPS:73for center_words, target_words in tfe.Iterator(dataset):74if num_train_steps >= NUM_TRAIN_STEPS:75break76loss_batch, grads = grad_fn(center_words, target_words)77total_loss += loss_batch78optimizer.apply_gradients(grads)79if (num_train_steps + 1) % SKIP_STEP == 0:80print('Average loss at step {}: {:5.1f}'.format(81num_train_steps, total_loss / SKIP_STEP))82total_loss = 0.083num_train_steps += 1848586if __name__ == '__main__':87main()888990