CoCalc -- emo_utils.py

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C5 - Sequence Models/Week 2/Emojify/emo_utils.py
⁵¹⁶⁹ views
1
import csv
2
import numpy as np
3
import emoji
4
import pandas as pd
5
import matplotlib.pyplot as plt
6
from sklearn.metrics import confusion_matrix
7

8
def read_glove_vecs(glove_file):
9
    with open(glove_file, 'r') as f:
10
        words = set()
11
        word_to_vec_map = {}
12
        for line in f:
13
            line = line.strip().split()
14
            curr_word = line[0]
15
            words.add(curr_word)
16
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
17
        
18
        i = 1
19
        words_to_index = {}
20
        index_to_words = {}
21
        for w in sorted(words):
22
            words_to_index[w] = i
23
            index_to_words[i] = w
24
            i = i + 1
25
    return words_to_index, index_to_words, word_to_vec_map
26

27
def softmax(x):
28
    """Compute softmax values for each sets of scores in x."""
29
    e_x = np.exp(x - np.max(x))
30
    return e_x / e_x.sum()
31

32

33
def read_csv(filename = 'data/emojify_data.csv'):
34
    phrase = []
35
    emoji = []
36

37
    with open (filename) as csvDataFile:
38
        csvReader = csv.reader(csvDataFile)
39

40
        for row in csvReader:
41
            phrase.append(row[0])
42
            emoji.append(row[1])
43

44
    X = np.asarray(phrase)
45
    Y = np.asarray(emoji, dtype=int)
46

47
    return X, Y
48

49
def convert_to_one_hot(Y, C):
50
    Y = np.eye(C)[Y.reshape(-1)]
51
    return Y
52

53

54
emoji_dictionary = {#"0": ":red_heart:",    # :heart: prints a black instead of red heart depending on the font
55
                    "0": "\u2764\ufe0f",
56
                    "1": ":baseball:",
57
                    "2": ":smile:",
58
                    "3": ":disappointed:",
59
                    "4": ":fork_and_knife:"}
60

61
def label_to_emoji(label):
62
    """
63
    Converts a label (int or string) into the corresponding emoji code (string) ready to be printed
64
    """
65
    return emoji.emojize(emoji_dictionary[str(label)], use_aliases=True)
66
              
67
    
68
def print_predictions(X, pred):
69
    print()
70
    for i in range(X.shape[0]):
71
        print(X[i], label_to_emoji(int(pred[i])))
72
        
73
        
74
def plot_confusion_matrix(y_actu, y_pred, title='Confusion matrix', cmap=plt.cm.gray_r):
75
    
76
    df_confusion = pd.crosstab(y_actu, y_pred.reshape(y_pred.shape[0],), rownames=['Actual'], colnames=['Predicted'], margins=True)
77
    
78
    df_conf_norm = df_confusion / df_confusion.sum(axis=1)
79
    
80
    plt.matshow(df_confusion, cmap=cmap) # imshow
81
    #plt.title(title)
82
    plt.colorbar()
83
    tick_marks = np.arange(len(df_confusion.columns))
84
    plt.xticks(tick_marks, df_confusion.columns, rotation=45)
85
    plt.yticks(tick_marks, df_confusion.index)
86
    #plt.tight_layout()
87
    plt.ylabel(df_confusion.index.name)
88
    plt.xlabel(df_confusion.columns.name)
89
    
90
    
91
    
92
def predict(X, Y, W, b, word_to_vec_map):
93
    """
94
    Given X (sentences) and Y (emoji indices), predict emojis and compute the accuracy of your model over the given set.
95
    
96
    Arguments:
97
    X -- input data containing sentences, numpy array of shape (m, None)
98
    Y -- labels, containing index of the label emoji, numpy array of shape (m, 1)
99
    
100
    Returns:
101
    pred -- numpy array of shape (m, 1) with your predictions
102
    """
103
    m = X.shape[0]
104
    pred = np.zeros((m, 1))
105
    any_word = list(word_to_vec_map.keys())[0]
106
    # number of classes  
107
    n_h = word_to_vec_map[any_word].shape[0] 
108
    
109
    for j in range(m):                       # Loop over training examples
110
        
111
        # Split jth test example (sentence) into list of lower case words
112
        words = X[j].lower().split()
113
        
114
        # Average words' vectors
115
        avg = np.zeros((n_h,))
116
        count = 0
117
        for w in words:
118
            if w in word_to_vec_map:
119
                avg += word_to_vec_map[w]
120
                count += 1
121
        
122
        if count > 0:
123
            avg = avg / count
124

125
        # Forward propagation
126
        Z = np.dot(W, avg) + b
127
        A = softmax(Z)
128
        pred[j] = np.argmax(A)
129
        
130
    print("Accuracy: "  + str(np.mean((pred[:] == Y.reshape(Y.shape[0],1)[:]))))
131
    
132
    return pred
133
Product

Resources

Company