📚 The CoCalc Library - books, templates and other resources
cocalc-examples / stanford-tensorflow-tutorials / 2017 / assignments / style_transfer / style_transfer.py
132930 viewsLicense: OTHER
""" An implementation of the paper "A Neural Algorithm of Artistic Style"1by Gatys et al. in TensorFlow.23Author: Chip Huyen ([email protected])4Prepared for the class CS 20SI: "TensorFlow for Deep Learning Research"5For more details, please read the assignment handout:6http://web.stanford.edu/class/cs20si/assignments/a2.pdf7"""8from __future__ import print_function910import os11os.environ['TF_CPP_MIN_LOG_LEVEL']='2'12import time1314import numpy as np15import tensorflow as tf1617import vgg_model18import utils1920# parameters to manage experiments21STYLE = 'guernica'22CONTENT = 'deadpool'23STYLE_IMAGE = 'styles/' + STYLE + '.jpg'24CONTENT_IMAGE = 'content/' + CONTENT + '.jpg'25IMAGE_HEIGHT = 25026IMAGE_WIDTH = 33327NOISE_RATIO = 0.6 # percentage of weight of the noise for intermixing with the content image2829CONTENT_WEIGHT = 0.0130STYLE_WEIGHT = 13132# Layers used for style features. You can change this.33STYLE_LAYERS = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']34W = [0.5, 1.0, 1.5, 3.0, 4.0] # give more weights to deeper layers.3536# Layer used for content features. You can change this.37CONTENT_LAYER = 'conv4_2'3839ITERS = 30040LR = 2.04142MEAN_PIXELS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))43""" MEAN_PIXELS is defined according to description on their github:44https://gist.github.com/ksimonyan/211839e770f7b538e2d845'In the paper, the model is denoted as the configuration D trained with scale jittering.46The input images should be zero-centered by mean pixel (rather than mean image) subtraction.47Namely, the following BGR values should be subtracted: [103.939, 116.779, 123.68].'48"""4950# VGG-19 parameters file51VGG_DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat'52VGG_MODEL = 'imagenet-vgg-verydeep-19.mat'53EXPECTED_BYTES = 5349047835455def _create_content_loss(p, f):56""" Calculate the loss between the feature representation of the57content image and the generated image.5859Inputs:60p, f are just P, F in the paper61(read the assignment handout if you're confused)62Note: we won't use the coefficient 0.5 as defined in the paper63but the coefficient as defined in the assignment handout.64Output:65the content loss6667"""68return tf.reduce_sum((f - p) ** 2) / (4.0 * p.size)6970def _gram_matrix(F, N, M):71""" Create and return the gram matrix for tensor F72Hint: you'll first have to reshape F73"""74F = tf.reshape(F, (M, N))75return tf.matmul(tf.transpose(F), F)7677def _single_style_loss(a, g):78""" Calculate the style loss at a certain layer79Inputs:80a is the feature representation of the real image81g is the feature representation of the generated image82Output:83the style loss at a certain layer (which is E_l in the paper)8485Hint: 1. you'll have to use the function _gram_matrix()862. we'll use the same coefficient for style loss as in the paper873. a and g are feature representation, not gram matrices88"""89N = a.shape[3] # number of filters90M = a.shape[1] * a.shape[2] # height times width of the feature map91A = _gram_matrix(a, N, M)92G = _gram_matrix(g, N, M)93return tf.reduce_sum((G - A) ** 2 / ((2 * N * M) ** 2))9495def _create_style_loss(A, model):96""" Return the total style loss97"""98n_layers = len(STYLE_LAYERS)99E = [_single_style_loss(A[i], model[STYLE_LAYERS[i]]) for i in range(n_layers)]100101###############################102## TO DO: return total style loss103return sum([W[i] * E[i] for i in range(n_layers)])104###############################105106def _create_losses(model, input_image, content_image, style_image):107with tf.variable_scope('loss') as scope:108with tf.Session() as sess:109sess.run(input_image.assign(content_image)) # assign content image to the input variable110p = sess.run(model[CONTENT_LAYER])111content_loss = _create_content_loss(p, model[CONTENT_LAYER])112113with tf.Session() as sess:114sess.run(input_image.assign(style_image))115A = sess.run([model[layer_name] for layer_name in STYLE_LAYERS])116style_loss = _create_style_loss(A, model)117118##########################################119## TO DO: create total loss.120## Hint: don't forget the content loss and style loss weights121total_loss = CONTENT_WEIGHT * content_loss + STYLE_WEIGHT * style_loss122##########################################123124return content_loss, style_loss, total_loss125126def _create_summary(model):127""" Create summary ops necessary128Hint: don't forget to merge them129"""130with tf.name_scope('summaries'):131tf.summary.scalar('content loss', model['content_loss'])132tf.summary.scalar('style loss', model['style_loss'])133tf.summary.scalar('total loss', model['total_loss'])134tf.summary.histogram('histogram content loss', model['content_loss'])135tf.summary.histogram('histogram style loss', model['style_loss'])136tf.summary.histogram('histogram total loss', model['total_loss'])137return tf.summary.merge_all()138139def train(model, generated_image, initial_image):140""" Train your model.141Don't forget to create folders for checkpoints and outputs.142"""143skip_step = 1144with tf.Session() as sess:145saver = tf.train.Saver()146###############################147## TO DO:148## 1. initialize your variables149## 2. create writer to write your graph150saver = tf.train.Saver()151sess.run(tf.global_variables_initializer())152writer = tf.summary.FileWriter('graphs', sess.graph)153###############################154sess.run(generated_image.assign(initial_image))155ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))156if ckpt and ckpt.model_checkpoint_path:157saver.restore(sess, ckpt.model_checkpoint_path)158initial_step = model['global_step'].eval()159160start_time = time.time()161for index in range(initial_step, ITERS):162if index >= 5 and index < 20:163skip_step = 10164elif index >= 20:165skip_step = 20166167sess.run(model['optimizer'])168if (index + 1) % skip_step == 0:169###############################170## TO DO: obtain generated image and loss171gen_image, total_loss, summary = sess.run([generated_image, model['total_loss'],172model['summary_op']])173174###############################175gen_image = gen_image + MEAN_PIXELS176writer.add_summary(summary, global_step=index)177print('Step {}\n Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))178print(' Loss: {:5.1f}'.format(total_loss))179print(' Time: {}'.format(time.time() - start_time))180start_time = time.time()181182filename = 'outputs/%d.png' % (index)183utils.save_image(filename, gen_image)184185if (index + 1) % 20 == 0:186saver.save(sess, 'checkpoints/style_transfer', index)187188def main():189with tf.variable_scope('input') as scope:190# use variable instead of placeholder because we're training the intial image to make it191# look like both the content image and the style image192input_image = tf.Variable(np.zeros([1, IMAGE_HEIGHT, IMAGE_WIDTH, 3]), dtype=tf.float32)193194utils.download(VGG_DOWNLOAD_LINK, VGG_MODEL, EXPECTED_BYTES)195utils.make_dir('checkpoints')196utils.make_dir('outputs')197model = vgg_model.load_vgg(VGG_MODEL, input_image)198model['global_step'] = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')199200content_image = utils.get_resized_image(CONTENT_IMAGE, IMAGE_HEIGHT, IMAGE_WIDTH)201content_image = content_image - MEAN_PIXELS202style_image = utils.get_resized_image(STYLE_IMAGE, IMAGE_HEIGHT, IMAGE_WIDTH)203style_image = style_image - MEAN_PIXELS204205model['content_loss'], model['style_loss'], model['total_loss'] = _create_losses(model,206input_image, content_image, style_image)207###############################208## TO DO: create optimizer209model['optimizer'] = tf.train.AdamOptimizer(LR).minimize(model['total_loss'],210global_step=model['global_step'])211###############################212model['summary_op'] = _create_summary(model)213214initial_image = utils.generate_noise_image(content_image, IMAGE_HEIGHT, IMAGE_WIDTH, NOISE_RATIO)215train(model, input_image, initial_image)216217if __name__ == '__main__':218main()219220221