📚 The CoCalc Library - books, templates and other resources
License: OTHER
""" Using convolutional net on MNIST dataset of handwritten digit1(http://yann.lecun.com/exdb/mnist/)2Author: Chip Huyen3Prepared for the class CS 20SI: "TensorFlow for Deep Learning Research"4cs20si.stanford.edu5"""67from __future__ import absolute_import8from __future__ import division9from __future__ import print_function1011import os12os.environ['TF_CPP_MIN_LOG_LEVEL']='2'1314import time1516import tensorflow as tf17import tf.contrib.layers as layers18from tensorflow.examples.tutorials.mnist import input_data1920import utils2122N_CLASSES = 102324# Step 1: Read in data25# using TF Learn's built in function to load MNIST data to the folder data/mnist26mnist = input_data.read_data_sets("/data/mnist", one_hot=True)2728# Step 2: Define paramaters for the model29LEARNING_RATE = 0.00130BATCH_SIZE = 12831SKIP_STEP = 1032DROPOUT = 0.7533N_EPOCHS = 13435# Step 3: create placeholders for features and labels36# each image in the MNIST data is of shape 28*28 = 78437# therefore, each image is represented with a 1x784 tensor38# We'll be doing dropout for hidden layer so we'll need a placeholder39# for the dropout probability too40# Use None for shape so we can change the batch_size once we've built the graph41with tf.name_scope('data'):42X = tf.placeholder(tf.float32, [None, 784], name="X_placeholder")43Y = tf.placeholder(tf.float32, [None, 10], name="Y_placeholder")4445dropout = tf.placeholder(tf.float32, name='dropout')4647# Step 4 + 5: create weights + do inference48# the model is conv -> relu -> pool -> conv -> relu -> pool -> fully connected -> softmax4950global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')5152with tf.variable_scope('conv1') as scope:53# first, reshape the image to [BATCH_SIZE, 28, 28, 1] to make it work with tf.nn.conv2d54images = tf.reshape(X, shape=[-1, 28, 28, 1])55kernel = tf.get_variable('kernel', [5, 5, 1, 32],56initializer=tf.truncated_normal_initializer())57biases = tf.get_variable('biases', [32],58initializer=tf.random_normal_initializer())59conv = tf.nn.conv2d(images, kernel, strides=[1, 1, 1, 1], padding='SAME')60conv1 = tf.nn.relu(conv + biases, name=scope.name)6162# output is of dimension BATCH_SIZE x 28 x 28 x 3263conv1 = layers.conv2d(images, 32, 5, 1, activation_fn=tf.nn.relu, padding='SAME')6465with tf.variable_scope('pool1') as scope:66pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],67padding='SAME')6869# output is of dimension BATCH_SIZE x 14 x 14 x 327071with tf.variable_scope('conv2') as scope:72# similar to conv1, except kernel now is of the size 5 x 5 x 32 x 6473kernel = tf.get_variable('kernels', [5, 5, 32, 64],74initializer=tf.truncated_normal_initializer())75biases = tf.get_variable('biases', [64],76initializer=tf.random_normal_initializer())77conv = tf.nn.conv2d(pool1, kernel, strides=[1, 1, 1, 1], padding='SAME')78conv2 = tf.nn.relu(conv + biases, name=scope.name)7980# output is of dimension BATCH_SIZE x 14 x 14 x 6481# layers.conv2d(images, 64, 5, 1, activation_fn=tf.nn.relu, padding='SAME')8283with tf.variable_scope('pool2') as scope:84# similar to pool185pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],86padding='SAME')8788# output is of dimension BATCH_SIZE x 7 x 7 x 648990with tf.variable_scope('fc') as scope:91# use weight of dimension 7 * 7 * 64 x 102492input_features = 7 * 7 * 6493w = tf.get_variable('weights', [input_features, 1024],94initializer=tf.truncated_normal_initializer())95b = tf.get_variable('biases', [1024],96initializer=tf.constant_initializer(0.0))9798# reshape pool2 to 2 dimensional99pool2 = tf.reshape(pool2, [-1, input_features])100fc = tf.nn.relu(tf.matmul(pool2, w) + b, name='relu')101102# pool2 = layers.flatten(pool2)103# fc = layers.fully_connected(pool2, 1024, tf.nn.relu)104105fc = tf.nn.dropout(fc, dropout, name='relu_dropout')106107with tf.variable_scope('softmax_linear') as scope:108w = tf.get_variable('weights', [1024, N_CLASSES],109initializer=tf.truncated_normal_initializer())110b = tf.get_variable('biases', [N_CLASSES],111initializer=tf.random_normal_initializer())112logits = tf.matmul(fc, w) + b113114115116117# Step 6: define loss function118# use softmax cross entropy with logits as the loss function119# compute mean cross entropy, softmax is applied internally120with tf.name_scope('loss'):121entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits)122loss = tf.reduce_mean(entropy, name='loss')123124with tf.name_scope('summaries'):125tf.summary.scalar('loss', loss)126tf.summary.histogram('histogram loss', loss)127summary_op = tf.summary.merge_all()128129# Step 7: define training op130# using gradient descent with learning rate of LEARNING_RATE to minimize cost131optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss,132global_step=global_step)133134utils.make_dir('checkpoints')135utils.make_dir('checkpoints/convnet_mnist')136137with tf.Session() as sess:138sess.run(tf.global_variables_initializer())139saver = tf.train.Saver()140# to visualize using TensorBoard141writer = tf.summary.FileWriter('./graphs/convnet', sess.graph)142ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/convnet_mnist/checkpoint'))143# if that checkpoint exists, restore from checkpoint144if ckpt and ckpt.model_checkpoint_path:145saver.restore(sess, ckpt.model_checkpoint_path)146147initial_step = global_step.eval()148149start_time = time.time()150n_batches = int(mnist.train.num_examples / BATCH_SIZE)151152total_loss = 0.0153for index in range(initial_step, n_batches * N_EPOCHS): # train the model n_epochs times154X_batch, Y_batch = mnist.train.next_batch(BATCH_SIZE)155_, loss_batch, summary = sess.run([optimizer, loss, summary_op],156feed_dict={X: X_batch, Y:Y_batch, dropout: DROPOUT})157writer.add_summary(summary, global_step=index)158total_loss += loss_batch159if (index + 1) % SKIP_STEP == 0:160print('Average loss at step {}: {:5.1f}'.format(index + 1, total_loss / SKIP_STEP))161total_loss = 0.0162saver.save(sess, 'checkpoints/convnet_mnist/mnist-convnet', index)163164print("Optimization Finished!") # should be around 0.35 after 25 epochs165print("Total time: {0} seconds".format(time.time() - start_time))166167# test the model168n_batches = int(mnist.test.num_examples/BATCH_SIZE)169total_correct_preds = 0170for i in range(n_batches):171X_batch, Y_batch = mnist.test.next_batch(BATCH_SIZE)172_, loss_batch, logits_batch = sess.run([optimizer, loss, logits],173feed_dict={X: X_batch, Y:Y_batch, dropout: 1.0})174preds = tf.nn.softmax(logits_batch)175correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y_batch, 1))176accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))177total_correct_preds += sess.run(accuracy)178179print("Accuracy {0}".format(total_correct_preds/mnist.test.num_examples))180181