CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
amanchadha

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C5 - Sequence Models/Week 1/Jazz improvisation with LSTM/data_utils.py
Views: 4819
1
from music_utils import *
2
from preprocess import *
3
from tensorflow.keras.utils import to_categorical
4
5
from collections import defaultdict
6
from mido import MidiFile
7
from pydub import AudioSegment
8
from pydub.generators import Sine
9
import math
10
11
#chords, abstract_grammars = get_musical_data('data/original_metheny.mid')
12
#corpus, tones, tones_indices, indices_tones = get_corpus_data(abstract_grammars)
13
#N_tones = len(set(corpus))
14
n_a = 64
15
x_initializer = np.zeros((1, 1, 90))
16
a_initializer = np.zeros((1, n_a))
17
c_initializer = np.zeros((1, n_a))
18
19
def load_music_utils(file):
20
chords, abstract_grammars = get_musical_data(file)
21
corpus, tones, tones_indices, indices_tones = get_corpus_data(abstract_grammars)
22
N_tones = len(set(corpus))
23
X, Y, N_tones = data_processing(corpus, tones_indices, 60, 30)
24
return (X, Y, N_tones, indices_tones, chords)
25
26
27
def generate_music(inference_model, indices_tones, chords, diversity = 0.5):
28
"""
29
Generates music using a model trained to learn musical patterns of a jazz soloist. Creates an audio stream
30
to save the music and play it.
31
32
Arguments:
33
model -- Keras model Instance, output of djmodel()
34
indices_tones -- a python dictionary mapping indices (0-77) into their corresponding unique tone (ex: A,0.250,< m2,P-4 >)
35
temperature -- scalar value, defines how conservative/creative the model is when generating music
36
37
Returns:
38
predicted_tones -- python list containing predicted tones
39
"""
40
41
# set up audio stream
42
out_stream = stream.Stream()
43
44
# Initialize chord variables
45
curr_offset = 0.0 # variable used to write sounds to the Stream.
46
num_chords = int(len(chords) / 3) # number of different set of chords
47
48
print("Predicting new values for different set of chords.")
49
# Loop over all 18 set of chords. At each iteration generate a sequence of tones
50
# and use the current chords to convert it into actual sounds
51
for i in range(1, num_chords):
52
53
# Retrieve current chord from stream
54
curr_chords = stream.Voice()
55
56
# Loop over the chords of the current set of chords
57
for j in chords[i]:
58
# Add chord to the current chords with the adequate offset, no need to understand this
59
curr_chords.insert((j.offset % 4), j)
60
61
# Generate a sequence of tones using the model
62
_, indices = predict_and_sample(inference_model)
63
indices = list(indices.squeeze())
64
pred = [indices_tones[p] for p in indices]
65
66
predicted_tones = 'C,0.25 '
67
for k in range(len(pred) - 1):
68
predicted_tones += pred[k] + ' '
69
70
predicted_tones += pred[-1]
71
72
#### POST PROCESSING OF THE PREDICTED TONES ####
73
# We will consider "A" and "X" as "C" tones. It is a common choice.
74
predicted_tones = predicted_tones.replace(' A',' C').replace(' X',' C')
75
76
# Pruning #1: smoothing measure
77
predicted_tones = prune_grammar(predicted_tones)
78
79
# Use predicted tones and current chords to generate sounds
80
sounds = unparse_grammar(predicted_tones, curr_chords)
81
82
# Pruning #2: removing repeated and too close together sounds
83
sounds = prune_notes(sounds)
84
85
# Quality assurance: clean up sounds
86
sounds = clean_up_notes(sounds)
87
88
# Print number of tones/notes in sounds
89
print('Generated %s sounds using the predicted values for the set of chords ("%s") and after pruning' % (len([k for k in sounds if isinstance(k, note.Note)]), i))
90
91
# Insert sounds into the output stream
92
for m in sounds:
93
out_stream.insert(curr_offset + m.offset, m)
94
for mc in curr_chords:
95
out_stream.insert(curr_offset + mc.offset, mc)
96
97
curr_offset += 4.0
98
99
# Initialize tempo of the output stream with 130 bit per minute
100
out_stream.insert(0.0, tempo.MetronomeMark(number=130))
101
102
# Save audio stream to fine
103
mf = midi.translate.streamToMidiFile(out_stream)
104
mf.open("output/my_music.midi", 'wb')
105
mf.write()
106
print("Your generated music is saved in output/my_music.midi")
107
mf.close()
108
109
# Play the final stream through output (see 'play' lambda function above)
110
# play = lambda x: midi.realtime.StreamPlayer(x).play()
111
# play(out_stream)
112
113
return out_stream
114
115
116
def predict_and_sample(inference_model, x_initializer = x_initializer, a_initializer = a_initializer,
117
c_initializer = c_initializer):
118
"""
119
Predicts the next value of values using the inference model.
120
121
Arguments:
122
inference_model -- Keras model instance for inference time
123
x_initializer -- numpy array of shape (1, 1, 78), one-hot vector initializing the values generation
124
a_initializer -- numpy array of shape (1, n_a), initializing the hidden state of the LSTM_cell
125
c_initializer -- numpy array of shape (1, n_a), initializing the cell state of the LSTM_cel
126
Ty -- length of the sequence you'd like to generate.
127
128
Returns:
129
results -- numpy-array of shape (Ty, 78), matrix of one-hot vectors representing the values generated
130
indices -- numpy-array of shape (Ty, 1), matrix of indices representing the values generated
131
"""
132
133
### START CODE HERE ###
134
pred = inference_model.predict([x_initializer, a_initializer, c_initializer])
135
indices = np.argmax(pred, axis = -1)
136
results = to_categorical(indices, num_classes=90)
137
### END CODE HERE ###
138
139
return results, indices
140
141
142
def note_to_freq(note, concert_A=440.0):
143
'''
144
from wikipedia: http://en.wikipedia.org/wiki/MIDI_Tuning_Standard#Frequency_values
145
'''
146
return (2.0 ** ((note - 69) / 12.0)) * concert_A
147
148
def ticks_to_ms(ticks, tempo, mid):
149
tick_ms = math.ceil((60000.0 / tempo) / mid.ticks_per_beat)
150
return ticks * tick_ms
151
152
def mid2wav(file):
153
mid = MidiFile(file)
154
output = AudioSegment.silent(mid.length * 1000.0)
155
156
tempo = 130 # bpm
157
158
for track in mid.tracks:
159
# position of rendering in ms
160
current_pos = 0.0
161
current_notes = defaultdict(dict)
162
163
for msg in track:
164
current_pos += ticks_to_ms(msg.time, tempo, mid)
165
if msg.type == 'note_on':
166
if msg.note in current_notes[msg.channel]:
167
current_notes[msg.channel][msg.note].append((current_pos, msg))
168
else:
169
current_notes[msg.channel][msg.note] = [(current_pos, msg)]
170
171
172
if msg.type == 'note_off':
173
start_pos, start_msg = current_notes[msg.channel][msg.note].pop()
174
175
duration = math.ceil(current_pos - start_pos)
176
signal_generator = Sine(note_to_freq(msg.note, 500))
177
#print(duration)
178
rendered = signal_generator.to_audio_segment(duration=duration-50, volume=-20).fade_out(100).fade_in(30)
179
180
output = output.overlay(rendered, start_pos)
181
182
output.export("./output/rendered.wav", format="wav")
183