CoCalc -- 04_word2vec_eager

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / stanford-tensorflow-tutorials / examples / 04_word2vec_eager_starter.py
¹³²⁹²³ views
License: OTHER
1
""" starter code for word2vec skip-gram model with NCE loss
2
Eager execution
3
CS 20: "TensorFlow for Deep Learning Research"
4
cs20.stanford.edu
5
Chip Huyen ([email protected]) & Akshay Agrawal ([email protected])
6
Lecture 04
7
"""
8

9
import os
10
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11

12
import numpy as np
13
import tensorflow as tf
14
import tensorflow.contrib.eager as tfe
15

16
import utils
17
import word2vec_utils
18

19
# Enable eager execution!
20
#############################
21
########## TO DO ############
22
#############################
23

24
# Model hyperparameters
25
VOCAB_SIZE = 50000
26
BATCH_SIZE = 128
27
EMBED_SIZE = 128            # dimension of the word embedding vectors
28
SKIP_WINDOW = 1             # the context window
29
NUM_SAMPLED = 64            # number of negative examples to sample
30
LEARNING_RATE = 1.0
31
NUM_TRAIN_STEPS = 100000
32
VISUAL_FLD = 'visualization'
33
SKIP_STEP = 5000
34

35
# Parameters for downloading data
36
DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
37
EXPECTED_BYTES = 31344016
38

39
class Word2Vec(object):
40
  def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
41
    self.vocab_size = vocab_size
42
    self.num_sampled = num_sampled
43
    # Create the variables: an embedding matrix, nce_weight, and nce_bias
44
    #############################
45
    ########## TO DO ############
46
    #############################
47
    self.embed_matrix = None
48
    self.nce_weight = None
49
    self.nce_bias = None
50

51
  def compute_loss(self, center_words, target_words):
52
    """Computes the forward pass of word2vec with the NCE loss.""" 
53
    # Look up the embeddings for the center words
54
    #############################
55
    ########## TO DO ############
56
    #############################
57
    embed = None
58

59
    # Compute the loss, using tf.reduce_mean and tf.nn.nce_loss
60
    #############################
61
    ########## TO DO ############
62
    #############################
63
    loss = None
64
    return loss
65

66

67
def gen():
68
  yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
69
                                      VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
70
                                      VISUAL_FLD)
71

72
def main():
73
  dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
74
                              (tf.TensorShape([BATCH_SIZE]),
75
                              tf.TensorShape([BATCH_SIZE, 1])))
76
  optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
77
  # Create the model
78
  #############################
79
  ########## TO DO ############
80
  #############################
81
  model = None
82

83
  # Create the gradients function, using `tfe.implicit_value_and_gradients`
84
  #############################
85
  ########## TO DO ############
86
  #############################
87
  grad_fn = None
88

89
  total_loss = 0.0  # for average loss in the last SKIP_STEP steps
90
  num_train_steps = 0
91
  while num_train_steps < NUM_TRAIN_STEPS:
92
    for center_words, target_words in tfe.Iterator(dataset):
93
      if num_train_steps >= NUM_TRAIN_STEPS:
94
        break
95

96
      # Compute the loss and gradients, and take an optimization step.
97
      #############################
98
      ########## TO DO ############
99
      #############################
100
      
101
      if (num_train_steps + 1) % SKIP_STEP == 0:
102
        print('Average loss at step {}: {:5.1f}'.format(
103
                num_train_steps, total_loss / SKIP_STEP))
104
        total_loss = 0.0
105
      num_train_steps += 1
106

107

108
if __name__ == '__main__':
109
    main()
110

111
Product

Resources

Company