CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
amanchadha

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C5 - Sequence Models/Week 1/Jazz improvisation with LSTM/preprocess.py
Views: 4819
1
'''
2
Author: Ji-Sung Kim
3
Project: deepjazz
4
Purpose: Parse, cleanup and process data.
5
6
Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml with
7
express permission.
8
'''
9
10
from __future__ import print_function
11
12
from music21 import *
13
from collections import defaultdict, OrderedDict
14
from itertools import groupby, zip_longest
15
16
from grammar import *
17
18
from grammar import parse_melody
19
from music_utils import *
20
21
#----------------------------HELPER FUNCTIONS----------------------------------#
22
23
''' Helper function to parse a MIDI file into its measures and chords '''
24
def __parse_midi(data_fn):
25
# Parse the MIDI data for separate melody and accompaniment parts.
26
midi_data = converter.parse(data_fn)
27
# Get melody part, compress into single voice.
28
melody_stream = midi_data[5] # For Metheny piece, Melody is Part #5.
29
melody1, melody2 = melody_stream.getElementsByClass(stream.Voice)
30
for j in melody2:
31
melody1.insert(j.offset, j)
32
melody_voice = melody1
33
34
for i in melody_voice:
35
if i.quarterLength == 0.0:
36
i.quarterLength = 0.25
37
38
# Change key signature to adhere to comp_stream (1 sharp, mode = major).
39
# Also add Electric Guitar.
40
melody_voice.insert(0, instrument.ElectricGuitar())
41
melody_voice.insert(0, key.KeySignature(sharps=1))
42
43
# The accompaniment parts. Take only the best subset of parts from
44
# the original data. Maybe add more parts, hand-add valid instruments.
45
# Should add least add a string part (for sparse solos).
46
# Verified are good parts: 0, 1, 6, 7 '''
47
partIndices = [0, 1, 6, 7]
48
comp_stream = stream.Voice()
49
comp_stream.append([j.flat for i, j in enumerate(midi_data)
50
if i in partIndices])
51
52
# Full stream containing both the melody and the accompaniment.
53
# All parts are flattened.
54
full_stream = stream.Voice()
55
for i in range(len(comp_stream)):
56
full_stream.append(comp_stream[i])
57
full_stream.append(melody_voice)
58
59
# Extract solo stream, assuming you know the positions ..ByOffset(i, j).
60
# Note that for different instruments (with stream.flat), you NEED to use
61
# stream.Part(), not stream.Voice().
62
# Accompanied solo is in range [478, 548)
63
solo_stream = stream.Voice()
64
for part in full_stream:
65
curr_part = stream.Part()
66
curr_part.append(part.getElementsByClass(instrument.Instrument))
67
curr_part.append(part.getElementsByClass(tempo.MetronomeMark))
68
curr_part.append(part.getElementsByClass(key.KeySignature))
69
curr_part.append(part.getElementsByClass(meter.TimeSignature))
70
curr_part.append(part.getElementsByOffset(476, 548,
71
includeEndBoundary=True))
72
cp = curr_part.flat
73
solo_stream.insert(cp)
74
75
# Group by measure so you can classify.
76
# Note that measure 0 is for the time signature, metronome, etc. which have
77
# an offset of 0.0.
78
melody_stream = solo_stream[-1]
79
measures = OrderedDict()
80
offsetTuples = [(int(n.offset / 4), n) for n in melody_stream]
81
measureNum = 0 # for now, don't use real m. nums (119, 120)
82
for key_x, group in groupby(offsetTuples, lambda x: x[0]):
83
measures[measureNum] = [n[1] for n in group]
84
measureNum += 1
85
86
# Get the stream of chords.
87
# offsetTuples_chords: group chords by measure number.
88
chordStream = solo_stream[0]
89
chordStream.removeByClass(note.Rest)
90
chordStream.removeByClass(note.Note)
91
offsetTuples_chords = [(int(n.offset / 4), n) for n in chordStream]
92
93
# Generate the chord structure. Use just track 1 (piano) since it is
94
# the only instrument that has chords.
95
# Group into 4s, just like before.
96
chords = OrderedDict()
97
measureNum = 0
98
for key_x, group in groupby(offsetTuples_chords, lambda x: x[0]):
99
chords[measureNum] = [n[1] for n in group]
100
measureNum += 1
101
102
# Fix for the below problem.
103
# 1) Find out why len(measures) != len(chords).
104
# ANSWER: resolves at end but melody ends 1/16 before last measure so doesn't
105
# actually show up, while the accompaniment's beat 1 right after does.
106
# Actually on second thought: melody/comp start on Ab, and resolve to
107
# the same key (Ab) so could actually just cut out last measure to loop.
108
# Decided: just cut out the last measure.
109
del chords[len(chords) - 1]
110
assert len(chords) == len(measures)
111
112
return measures, chords
113
114
''' Helper function to get the grammatical data from given musical data. '''
115
def __get_abstract_grammars(measures, chords):
116
# extract grammars
117
abstract_grammars = []
118
for ix in range(1, len(measures)):
119
m = stream.Voice()
120
for i in measures[ix]:
121
m.insert(i.offset, i)
122
c = stream.Voice()
123
for j in chords[ix]:
124
c.insert(j.offset, j)
125
parsed = parse_melody(m, c)
126
abstract_grammars.append(parsed)
127
128
return abstract_grammars
129
130
#----------------------------PUBLIC FUNCTIONS----------------------------------#
131
132
''' Get musical data from a MIDI file '''
133
def get_musical_data(data_fn):
134
135
measures, chords = __parse_midi(data_fn)
136
abstract_grammars = __get_abstract_grammars(measures, chords)
137
138
return chords, abstract_grammars
139
140
''' Get corpus data from grammatical data '''
141
def get_corpus_data(abstract_grammars):
142
corpus = [x for sublist in abstract_grammars for x in sublist.split(' ')]
143
values = set(corpus)
144
val_indices = dict((v, i) for i, v in enumerate(values))
145
indices_val = dict((i, v) for i, v in enumerate(values))
146
147
return corpus, values, val_indices, indices_val
148
149
'''
150
def load_music_utils():
151
chord_data, raw_music_data = get_musical_data('data/original_metheny.mid')
152
music_data, values, values_indices, indices_values = get_corpus_data(raw_music_data)
153
154
X, Y = data_processing(music_data, values_indices, Tx = 20, step = 3)
155
return (X, Y)
156
'''
157
158