Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/master/C5 - Sequence Models/Week 2/Emojify/emo_utils.py
Views: 4819
import csv1import numpy as np2import emoji3import pandas as pd4import matplotlib.pyplot as plt5from sklearn.metrics import confusion_matrix67def read_glove_vecs(glove_file):8with open(glove_file, 'r') as f:9words = set()10word_to_vec_map = {}11for line in f:12line = line.strip().split()13curr_word = line[0]14words.add(curr_word)15word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)1617i = 118words_to_index = {}19index_to_words = {}20for w in sorted(words):21words_to_index[w] = i22index_to_words[i] = w23i = i + 124return words_to_index, index_to_words, word_to_vec_map2526def softmax(x):27"""Compute softmax values for each sets of scores in x."""28e_x = np.exp(x - np.max(x))29return e_x / e_x.sum()303132def read_csv(filename = 'data/emojify_data.csv'):33phrase = []34emoji = []3536with open (filename) as csvDataFile:37csvReader = csv.reader(csvDataFile)3839for row in csvReader:40phrase.append(row[0])41emoji.append(row[1])4243X = np.asarray(phrase)44Y = np.asarray(emoji, dtype=int)4546return X, Y4748def convert_to_one_hot(Y, C):49Y = np.eye(C)[Y.reshape(-1)]50return Y515253emoji_dictionary = {#"0": ":red_heart:", # :heart: prints a black instead of red heart depending on the font54"0": "\u2764\ufe0f",55"1": ":baseball:",56"2": ":smile:",57"3": ":disappointed:",58"4": ":fork_and_knife:"}5960def label_to_emoji(label):61"""62Converts a label (int or string) into the corresponding emoji code (string) ready to be printed63"""64return emoji.emojize(emoji_dictionary[str(label)], use_aliases=True)656667def print_predictions(X, pred):68print()69for i in range(X.shape[0]):70print(X[i], label_to_emoji(int(pred[i])))717273def plot_confusion_matrix(y_actu, y_pred, title='Confusion matrix', cmap=plt.cm.gray_r):7475df_confusion = pd.crosstab(y_actu, y_pred.reshape(y_pred.shape[0],), rownames=['Actual'], colnames=['Predicted'], margins=True)7677df_conf_norm = df_confusion / df_confusion.sum(axis=1)7879plt.matshow(df_confusion, cmap=cmap) # imshow80#plt.title(title)81plt.colorbar()82tick_marks = np.arange(len(df_confusion.columns))83plt.xticks(tick_marks, df_confusion.columns, rotation=45)84plt.yticks(tick_marks, df_confusion.index)85#plt.tight_layout()86plt.ylabel(df_confusion.index.name)87plt.xlabel(df_confusion.columns.name)88899091def predict(X, Y, W, b, word_to_vec_map):92"""93Given X (sentences) and Y (emoji indices), predict emojis and compute the accuracy of your model over the given set.9495Arguments:96X -- input data containing sentences, numpy array of shape (m, None)97Y -- labels, containing index of the label emoji, numpy array of shape (m, 1)9899Returns:100pred -- numpy array of shape (m, 1) with your predictions101"""102m = X.shape[0]103pred = np.zeros((m, 1))104any_word = list(word_to_vec_map.keys())[0]105# number of classes106n_h = word_to_vec_map[any_word].shape[0]107108for j in range(m): # Loop over training examples109110# Split jth test example (sentence) into list of lower case words111words = X[j].lower().split()112113# Average words' vectors114avg = np.zeros((n_h,))115count = 0116for w in words:117if w in word_to_vec_map:118avg += word_to_vec_map[w]119count += 1120121if count > 0:122avg = avg / count123124# Forward propagation125Z = np.dot(W, avg) + b126A = softmax(Z)127pred[j] = np.argmax(A)128129print("Accuracy: " + str(np.mean((pred[:] == Y.reshape(Y.shape[0],1)[:]))))130131return pred132133