CoCalc -- preprocess.py

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C5 - Sequence Models/Week 1/Jazz improvisation with LSTM/preprocess.py
⁵⁹⁶⁸ views
1
'''
2
Author:     Ji-Sung Kim
3
Project:    deepjazz
4
Purpose:    Parse, cleanup and process data.
5

6
Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml with
7
express permission.
8
'''
9

10
from __future__ import print_function
11

12
from music21 import *
13
from collections import defaultdict, OrderedDict
14
from itertools import groupby, zip_longest
15

16
from grammar import *
17

18
from grammar import parse_melody
19
from music_utils import *
20

21
#----------------------------HELPER FUNCTIONS----------------------------------#
22

23
''' Helper function to parse a MIDI file into its measures and chords '''
24
def __parse_midi(data_fn):
25
    # Parse the MIDI data for separate melody and accompaniment parts.
26
    midi_data = converter.parse(data_fn)
27
    # Get melody part, compress into single voice.
28
    melody_stream = midi_data[5]     # For Metheny piece, Melody is Part #5.
29
    melody1, melody2 = melody_stream.getElementsByClass(stream.Voice)
30
    for j in melody2:
31
        melody1.insert(j.offset, j)
32
    melody_voice = melody1
33

34
    for i in melody_voice:
35
        if i.quarterLength == 0.0:
36
            i.quarterLength = 0.25
37

38
    # Change key signature to adhere to comp_stream (1 sharp, mode = major).
39
    # Also add Electric Guitar. 
40
    melody_voice.insert(0, instrument.ElectricGuitar())
41
    melody_voice.insert(0, key.KeySignature(sharps=1))
42

43
    # The accompaniment parts. Take only the best subset of parts from
44
    # the original data. Maybe add more parts, hand-add valid instruments.
45
    # Should add least add a string part (for sparse solos).
46
    # Verified are good parts: 0, 1, 6, 7 '''
47
    partIndices = [0, 1, 6, 7]
48
    comp_stream = stream.Voice()
49
    comp_stream.append([j.flat for i, j in enumerate(midi_data) 
50
        if i in partIndices])
51

52
    # Full stream containing both the melody and the accompaniment. 
53
    # All parts are flattened. 
54
    full_stream = stream.Voice()
55
    for i in range(len(comp_stream)):
56
        full_stream.append(comp_stream[i])
57
    full_stream.append(melody_voice)
58

59
    # Extract solo stream, assuming you know the positions ..ByOffset(i, j).
60
    # Note that for different instruments (with stream.flat), you NEED to use
61
    # stream.Part(), not stream.Voice().
62
    # Accompanied solo is in range [478, 548)
63
    solo_stream = stream.Voice()
64
    for part in full_stream:
65
        curr_part = stream.Part()
66
        curr_part.append(part.getElementsByClass(instrument.Instrument))
67
        curr_part.append(part.getElementsByClass(tempo.MetronomeMark))
68
        curr_part.append(part.getElementsByClass(key.KeySignature))
69
        curr_part.append(part.getElementsByClass(meter.TimeSignature))
70
        curr_part.append(part.getElementsByOffset(476, 548, 
71
                                                  includeEndBoundary=True))
72
        cp = curr_part.flat
73
        solo_stream.insert(cp)
74

75
    # Group by measure so you can classify. 
76
    # Note that measure 0 is for the time signature, metronome, etc. which have
77
    # an offset of 0.0.
78
    melody_stream = solo_stream[-1]
79
    measures = OrderedDict()
80
    offsetTuples = [(int(n.offset / 4), n) for n in melody_stream]
81
    measureNum = 0 # for now, don't use real m. nums (119, 120)
82
    for key_x, group in groupby(offsetTuples, lambda x: x[0]):
83
        measures[measureNum] = [n[1] for n in group]
84
        measureNum += 1
85

86
    # Get the stream of chords.
87
    # offsetTuples_chords: group chords by measure number.
88
    chordStream = solo_stream[0]
89
    chordStream.removeByClass(note.Rest)
90
    chordStream.removeByClass(note.Note)
91
    offsetTuples_chords = [(int(n.offset / 4), n) for n in chordStream]
92

93
    # Generate the chord structure. Use just track 1 (piano) since it is
94
    # the only instrument that has chords. 
95
    # Group into 4s, just like before. 
96
    chords = OrderedDict()
97
    measureNum = 0
98
    for key_x, group in groupby(offsetTuples_chords, lambda x: x[0]):
99
        chords[measureNum] = [n[1] for n in group]
100
        measureNum += 1
101

102
    # Fix for the below problem.
103
    #   1) Find out why len(measures) != len(chords).
104
    #   ANSWER: resolves at end but melody ends 1/16 before last measure so doesn't
105
    #           actually show up, while the accompaniment's beat 1 right after does.
106
    #           Actually on second thought: melody/comp start on Ab, and resolve to
107
    #           the same key (Ab) so could actually just cut out last measure to loop.
108
    #           Decided: just cut out the last measure. 
109
    del chords[len(chords) - 1]
110
    assert len(chords) == len(measures)
111

112
    return measures, chords
113

114
''' Helper function to get the grammatical data from given musical data. '''
115
def __get_abstract_grammars(measures, chords):
116
    # extract grammars
117
    abstract_grammars = []
118
    for ix in range(1, len(measures)):
119
        m = stream.Voice()
120
        for i in measures[ix]:
121
            m.insert(i.offset, i)
122
        c = stream.Voice()
123
        for j in chords[ix]:
124
            c.insert(j.offset, j)
125
        parsed = parse_melody(m, c)
126
        abstract_grammars.append(parsed)
127

128
    return abstract_grammars
129

130
#----------------------------PUBLIC FUNCTIONS----------------------------------#
131

132
''' Get musical data from a MIDI file '''
133
def get_musical_data(data_fn):
134
    
135
    measures, chords = __parse_midi(data_fn)
136
    abstract_grammars = __get_abstract_grammars(measures, chords)
137

138
    return chords, abstract_grammars
139

140
''' Get corpus data from grammatical data '''
141
def get_corpus_data(abstract_grammars):
142
    corpus = [x for sublist in abstract_grammars for x in sublist.split(' ')]
143
    values = set(corpus)
144
    val_indices = dict((v, i) for i, v in enumerate(values))
145
    indices_val = dict((i, v) for i, v in enumerate(values))
146

147
    return corpus, values, val_indices, indices_val
148

149
'''
150
def load_music_utils():
151
    chord_data, raw_music_data = get_musical_data('data/original_metheny.mid')
152
    music_data, values, values_indices, indices_values = get_corpus_data(raw_music_data)
153

154
    X, Y = data_processing(music_data, values_indices, Tx = 20, step = 3)
155
    return (X, Y)
156
'''
157

158
Product

Resources

Company