Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/master/C4 - Convolutional Neural Networks/Week 4/Neural Style Transfer/nst_utils.py
Views: 4818
### Part of this code is due to the MatConvNet team and is used to load the parameters of the pretrained VGG19 model in the notebook ###12import os3import sys4import scipy.io5import scipy.misc6import matplotlib.pyplot as plt7from matplotlib.pyplot import imshow8from PIL import Image9from nst_utils import *1011import numpy as np12import tensorflow as tf1314class CONFIG:15IMAGE_WIDTH = 40016IMAGE_HEIGHT = 30017COLOR_CHANNELS = 318NOISE_RATIO = 0.619MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))20VGG_MODEL = 'pretrained-model/imagenet-vgg-verydeep-19.mat' # Pick the VGG 19-layer model by from the paper "Very Deep Convolutional Networks for Large-Scale Image Recognition".21STYLE_IMAGE = 'images/stone_style.jpg' # Style image to use.22CONTENT_IMAGE = 'images/content300.jpg' # Content image to use.23OUTPUT_DIR = 'output/'2425def load_vgg_model(path):26"""27Returns a model for the purpose of 'painting' the picture.28Takes only the convolution layer weights and wrap using the TensorFlow29Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but30the paper indicates that using AveragePooling yields better results.31The last few fully connected layers are not used.32Here is the detailed configuration of the VGG model:330 is conv1_1 (3, 3, 3, 64)341 is relu352 is conv1_2 (3, 3, 64, 64)363 is relu374 is maxpool385 is conv2_1 (3, 3, 64, 128)396 is relu407 is conv2_2 (3, 3, 128, 128)418 is relu429 is maxpool4310 is conv3_1 (3, 3, 128, 256)4411 is relu4512 is conv3_2 (3, 3, 256, 256)4613 is relu4714 is conv3_3 (3, 3, 256, 256)4815 is relu4916 is conv3_4 (3, 3, 256, 256)5017 is relu5118 is maxpool5219 is conv4_1 (3, 3, 256, 512)5320 is relu5421 is conv4_2 (3, 3, 512, 512)5522 is relu5623 is conv4_3 (3, 3, 512, 512)5724 is relu5825 is conv4_4 (3, 3, 512, 512)5926 is relu6027 is maxpool6128 is conv5_1 (3, 3, 512, 512)6229 is relu6330 is conv5_2 (3, 3, 512, 512)6431 is relu6532 is conv5_3 (3, 3, 512, 512)6633 is relu6734 is conv5_4 (3, 3, 512, 512)6835 is relu6936 is maxpool7037 is fullyconnected (7, 7, 512, 4096)7138 is relu7239 is fullyconnected (1, 1, 4096, 4096)7340 is relu7441 is fullyconnected (1, 1, 4096, 1000)7542 is softmax76"""7778vgg = scipy.io.loadmat(path)7980vgg_layers = vgg['layers']8182def _weights(layer, expected_layer_name):83"""84Return the weights and bias from the VGG model for a given layer.85"""86wb = vgg_layers[0][layer][0][0][2]87W = wb[0][0]88b = wb[0][1]89layer_name = vgg_layers[0][layer][0][0][0][0]90assert layer_name == expected_layer_name91return W, b9293return W, b9495def _relu(conv2d_layer):96"""97Return the RELU function wrapped over a TensorFlow layer. Expects a98Conv2d layer input.99"""100return tf.nn.relu(conv2d_layer)101102def _conv2d(prev_layer, layer, layer_name):103"""104Return the Conv2D layer using the weights, biases from the VGG105model at 'layer'.106"""107W, b = _weights(layer, layer_name)108W = tf.constant(W)109b = tf.constant(np.reshape(b, (b.size)))110return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b111112def _conv2d_relu(prev_layer, layer, layer_name):113"""114Return the Conv2D + RELU layer using the weights, biases from the VGG115model at 'layer'.116"""117return _relu(_conv2d(prev_layer, layer, layer_name))118119def _avgpool(prev_layer):120"""121Return the AveragePooling layer.122"""123return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')124125# Constructs the graph model.126graph = {}127graph['input'] = tf.Variable(np.zeros((1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)), dtype = 'float32')128graph['conv1_1'] = _conv2d_relu(graph['input'], 0, 'conv1_1')129graph['conv1_2'] = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')130graph['avgpool1'] = _avgpool(graph['conv1_2'])131graph['conv2_1'] = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')132graph['conv2_2'] = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')133graph['avgpool2'] = _avgpool(graph['conv2_2'])134graph['conv3_1'] = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')135graph['conv3_2'] = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')136graph['conv3_3'] = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')137graph['conv3_4'] = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')138graph['avgpool3'] = _avgpool(graph['conv3_4'])139graph['conv4_1'] = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')140graph['conv4_2'] = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')141graph['conv4_3'] = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')142graph['conv4_4'] = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')143graph['avgpool4'] = _avgpool(graph['conv4_4'])144graph['conv5_1'] = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')145graph['conv5_2'] = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')146graph['conv5_3'] = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')147graph['conv5_4'] = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')148graph['avgpool5'] = _avgpool(graph['conv5_4'])149150return graph151152def generate_noise_image(content_image, noise_ratio = CONFIG.NOISE_RATIO):153"""154Generates a noisy image by adding random noise to the content_image155"""156157# Generate a random noise_image158noise_image = np.random.uniform(-20, 20, (1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)).astype('float32')159160# Set the input_image to be a weighted average of the content_image and a noise_image161input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)162163return input_image164165166def reshape_and_normalize_image(image):167"""168Reshape and normalize the input image (content or style)169"""170171# Reshape image to mach expected input of VGG16172image = np.reshape(image, ((1,) + image.shape))173174# Substract the mean to match the expected input of VGG16175image = image - CONFIG.MEANS176177return image178179180def save_image(path, image):181182# Un-normalize the image so that it looks good183image = image + CONFIG.MEANS184185# Clip and Save the image186image = np.clip(image[0], 0, 255).astype('uint8')187scipy.misc.imsave(path, image)188189