CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
y33-j3T

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: y33-j3T/Coursera-Deep-Learning
Path: blob/master/Sequence Models/Week 2/Emojify/emo_utils.py
Views: 13377
1
import csv
2
import numpy as np
3
import emoji
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
from sklearn.metrics import confusion_matrix
7
8
def read_glove_vecs(glove_file):
9
with open(glove_file, 'r') as f:
10
words = set()
11
word_to_vec_map = {}
12
for line in f:
13
line = line.strip().split()
14
curr_word = line[0]
15
words.add(curr_word)
16
word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
17
18
i = 1
19
words_to_index = {}
20
index_to_words = {}
21
for w in sorted(words):
22
words_to_index[w] = i
23
index_to_words[i] = w
24
i = i + 1
25
return words_to_index, index_to_words, word_to_vec_map
26
27
def softmax(x):
28
"""Compute softmax values for each sets of scores in x."""
29
e_x = np.exp(x - np.max(x))
30
return e_x / e_x.sum()
31
32
33
def read_csv(filename = 'data/emojify_data.csv'):
34
phrase = []
35
emoji = []
36
37
with open (filename) as csvDataFile:
38
csvReader = csv.reader(csvDataFile)
39
40
for row in csvReader:
41
phrase.append(row[0])
42
emoji.append(row[1])
43
44
X = np.asarray(phrase)
45
Y = np.asarray(emoji, dtype=int)
46
47
return X, Y
48
49
def convert_to_one_hot(Y, C):
50
Y = np.eye(C)[Y.reshape(-1)]
51
return Y
52
53
54
emoji_dictionary = {"0": "\u2764\uFE0F", # :heart: prints a black instead of red heart depending on the font
55
"1": ":baseball:",
56
"2": ":smile:",
57
"3": ":disappointed:",
58
"4": ":fork_and_knife:"}
59
60
def label_to_emoji(label):
61
"""
62
Converts a label (int or string) into the corresponding emoji code (string) ready to be printed
63
"""
64
return emoji.emojize(emoji_dictionary[str(label)], use_aliases=True)
65
66
67
def print_predictions(X, pred):
68
print()
69
for i in range(X.shape[0]):
70
print(X[i], label_to_emoji(int(pred[i])))
71
72
73
def plot_confusion_matrix(y_actu, y_pred, title='Confusion matrix', cmap=plt.cm.gray_r):
74
75
df_confusion = pd.crosstab(y_actu, y_pred.reshape(y_pred.shape[0],), rownames=['Actual'], colnames=['Predicted'], margins=True)
76
77
df_conf_norm = df_confusion / df_confusion.sum(axis=1)
78
79
plt.matshow(df_confusion, cmap=cmap) # imshow
80
#plt.title(title)
81
plt.colorbar()
82
tick_marks = np.arange(len(df_confusion.columns))
83
plt.xticks(tick_marks, df_confusion.columns, rotation=45)
84
plt.yticks(tick_marks, df_confusion.index)
85
#plt.tight_layout()
86
plt.ylabel(df_confusion.index.name)
87
plt.xlabel(df_confusion.columns.name)
88
89
90
def predict(X, Y, W, b, word_to_vec_map):
91
"""
92
Given X (sentences) and Y (emoji indices), predict emojis and compute the accuracy of your model over the given set.
93
94
Arguments:
95
X -- input data containing sentences, numpy array of shape (m, None)
96
Y -- labels, containing index of the label emoji, numpy array of shape (m, 1)
97
98
Returns:
99
pred -- numpy array of shape (m, 1) with your predictions
100
"""
101
m = X.shape[0]
102
pred = np.zeros((m, 1))
103
104
for j in range(m): # Loop over training examples
105
106
# Split jth test example (sentence) into list of lower case words
107
words = X[j].lower().split()
108
109
# Average words' vectors
110
avg = np.zeros((50,))
111
for w in words:
112
avg += word_to_vec_map[w]
113
avg = avg/len(words)
114
115
# Forward propagation
116
Z = np.dot(W, avg) + b
117
A = softmax(Z)
118
pred[j] = np.argmax(A)
119
120
print("Accuracy: " + str(np.mean((pred[:] == Y.reshape(Y.shape[0],1)[:]))))
121
122
return pred
123