CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
amanchadha

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C5 - Sequence Models/Week 1/Dinosaur Island -- Character-level language model/shakespeare_utils.py
Views: 4819
1
# Load Packages
2
from __future__ import print_function
3
from tensorflow.keras.callbacks import LambdaCallback
4
from tensorflow.keras.models import Model, load_model, Sequential
5
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, Masking
6
from tensorflow.keras.layers import LSTM
7
from tensorflow.keras.utils import get_file
8
from tensorflow.keras.preprocessing.sequence import pad_sequences
9
import numpy as np
10
import random
11
import sys
12
import io
13
14
def build_data(text, Tx = 40, stride = 3):
15
"""
16
Create a training set by scanning a window of size Tx over the text corpus, with stride 3.
17
18
Arguments:
19
text -- string, corpus of Shakespearian poem
20
Tx -- sequence length, number of time-steps (or characters) in one training example
21
stride -- how much the window shifts itself while scanning
22
23
Returns:
24
X -- list of training examples
25
Y -- list of training labels
26
"""
27
28
X = []
29
Y = []
30
31
### START CODE HERE ### (≈ 3 lines)
32
for i in range(0, len(text) - Tx, stride):
33
X.append(text[i: i + Tx])
34
Y.append(text[i + Tx])
35
### END CODE HERE ###
36
37
print('number of training examples:', len(X))
38
39
return X, Y
40
41
42
def vectorization(X, Y, n_x, char_indices, Tx = 40):
43
"""
44
Convert X and Y (lists) into arrays to be given to a recurrent neural network.
45
46
Arguments:
47
X --
48
Y --
49
Tx -- integer, sequence length
50
51
Returns:
52
x -- array of shape (m, Tx, len(chars))
53
y -- array of shape (m, len(chars))
54
"""
55
56
m = len(X)
57
x = np.zeros((m, Tx, n_x), dtype=np.bool)
58
y = np.zeros((m, n_x), dtype=np.bool)
59
for i, sentence in enumerate(X):
60
for t, char in enumerate(sentence):
61
x[i, t, char_indices[char]] = 1
62
y[i, char_indices[Y[i]]] = 1
63
64
return x, y
65
66
67
def sample(preds, temperature=1.0):
68
# helper function to sample an index from a probability array
69
preds = np.asarray(preds).astype('float64')
70
preds = np.log(preds) / temperature
71
exp_preds = np.exp(preds)
72
preds = exp_preds / np.sum(exp_preds)
73
probas = np.random.multinomial(1, preds, 1)
74
out = np.random.choice(range(len(chars)), p = probas.ravel())
75
return out
76
#return np.argmax(probas)
77
78
def on_epoch_end(epoch, logs):
79
# Function invoked at end of each epoch. Prints generated text.
80
None
81
#start_index = random.randint(0, len(text) - Tx - 1)
82
83
#generated = ''
84
#sentence = text[start_index: start_index + Tx]
85
#sentence = '0'*Tx
86
#usr_input = input("Write the beginning of your poem, the Shakespearian machine will complete it.")
87
# zero pad the sentence to Tx characters.
88
#sentence = ('{0:0>' + str(Tx) + '}').format(usr_input).lower()
89
#generated += sentence
90
#
91
#sys.stdout.write(usr_input)
92
93
#for i in range(400):
94
"""
95
#x_pred = np.zeros((1, Tx, len(chars)))
96
97
for t, char in enumerate(sentence):
98
if char != '0':
99
x_pred[0, t, char_indices[char]] = 1.
100
101
preds = model.predict(x_pred, verbose=0)[0]
102
next_index = sample(preds, temperature = 1.0)
103
next_char = indices_char[next_index]
104
105
generated += next_char
106
sentence = sentence[1:] + next_char
107
108
sys.stdout.write(next_char)
109
sys.stdout.flush()
110
111
if next_char == '\n':
112
continue
113
114
# Stop at the end of a line (4 lines)
115
print()
116
"""
117
print("Loading text data...")
118
text = io.open('shakespeare.txt', encoding='utf-8').read().lower()
119
#print('corpus length:', len(text))
120
121
Tx = 40
122
chars = sorted(list(set(text)))
123
char_indices = dict((c, i) for i, c in enumerate(chars))
124
indices_char = dict((i, c) for i, c in enumerate(chars))
125
#print('number of unique characters in the corpus:', len(chars))
126
127
print("Creating training set...")
128
X, Y = build_data(text, Tx, stride = 3)
129
print("Vectorizing training set...")
130
x, y = vectorization(X, Y, n_x = len(chars), char_indices = char_indices)
131
print("Loading model...")
132
model = load_model('models/model_shakespeare_kiank_350_epoch.h5')
133
134
135
def generate_output():
136
generated = ''
137
#sentence = text[start_index: start_index + Tx]
138
#sentence = '0'*Tx
139
usr_input = input("Write the beginning of your poem, the Shakespeare machine will complete it. Your input is: ")
140
# zero pad the sentence to Tx characters.
141
sentence = ('{0:0>' + str(Tx) + '}').format(usr_input).lower()
142
generated += usr_input
143
144
sys.stdout.write("\n\nHere is your poem: \n\n")
145
sys.stdout.write(usr_input)
146
for i in range(400):
147
148
x_pred = np.zeros((1, Tx, len(chars)))
149
150
for t, char in enumerate(sentence):
151
if char != '0':
152
x_pred[0, t, char_indices[char]] = 1.
153
154
preds = model.predict(x_pred, verbose=0)[0]
155
next_index = sample(preds, temperature = 1.0)
156
next_char = indices_char[next_index]
157
158
generated += next_char
159
sentence = sentence[1:] + next_char
160
161
sys.stdout.write(next_char)
162
sys.stdout.flush()
163
164
if next_char == '\n':
165
continue
166