-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio_model
199 lines (155 loc) · 7.53 KB
/
audio_model
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#!/usr/bin/env python
import os
import math
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import click
from tensorflow.keras import layers
@click.command()
@click.option("--dataset_dir", help="Dataset directory")
@click.option("--sample_rate", default=44100, help="Frequency of files, all must be have the same")
@click.option("--min_track_duration", default=10, help="Shortest duration of all files, needed to cut everything to its length")
@click.option("--min_array_length", default=1000000, help="Optional: give smallest signal array length. Otherwise, calcuated on its own")
@click.option("--from_json", default=False, type=bool, help="Load data from a json file that was loaded with this script for the purpose of saving time")
@click.option("--json_save", default=False, type=bool, help="Save data generated for the training to a json file in order the get faster to another training")
@click.option("--output_json", default="data.json", help="Where to output the json")
@click.option("--json_path", default="data.json", help="Where to look for the json data if from_json is true")
@click.option("--model_name", default="model", help="Desired name for the model, \'model\' if none given")
@click.option("--test_size", default=0.3, help="Desired test size, 0.3 if none given")
@click.option("--epochs", default=50, help="Amound of epochs to train the model on")
@click.option("--activation", default="relu", help="Activation function to use")
@click.option("--dropout", default=True, type=bool, help="Dropout layers in the training?")
@click.option("--num_segments", default=5, type=int)
def create_model(dataset_dir, sample_rate, min_track_duration, min_array_length, from_json, json_save, output_json, json_path, model_name, test_size, epochs, activation, dropout, num_segments, num_mfcc=13, n_fft=2048, hop_length=512):
"""Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels.
:param dataset_dir (str): Path to dataset
:param num_mfcc (int): Number of coefficients to extract
:param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
:param hop_length (int): Sliding window for FFT. Measured in # of samples
:param: num_segments (int): Number of segments we want to divide sample tracks into
:return:
"""
if not from_json:
mapping = []
labels = []
mfcc_data = []
samples_per_segment = int(sample_rate * min_track_duration / num_segments)
num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)
# Loop through all genre sub-folder
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_dir)):
# Ensure we're processing a genre sub-folder level
if dirpath is not dataset_dir:
# Save genre label (i.e., sub-folder name) in the mapping
semantic_label = dirpath.split("/")[-1]
mapping.append(semantic_label)
print("\nProcessing: {}".format(semantic_label))
# Process all audio files in genre sub-dir
for f in filenames:
# Load audio file
file_path = os.path.join(dirpath, f)
signal, sample_rate = librosa.load(file_path, sr=sample_rate)
signal = signal[min_array_length:]
# Process all segments of audio file
for d in range(num_segments):
# Calculate start and finish sample for current segment
start = samples_per_segment * d
finish = start + samples_per_segment
# Extract mfcc
mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
mfcc = mfcc.T
# Store only mfcc feature with expected number of vectors
if len(mfcc) == num_mfcc_vectors_per_segment:
mfcc_data.append(mfcc.tolist())
labels.append(i-1)
print("{}, segment:{}".format(file_path, d+1))
if json_save:
import json
data = {
"mapping": mapping,
"labels": labels,
"mfcc": mfcc_data
}
# Save MFCCs to json file
with open(json_path, "w") as fp:
json.dump(data, fp, indent=4)
else:
import json
"""Loads training dataset from json file.
:param json_path (str): Path to json file containing data
:return X (ndarray): Inputs
:return y (ndarray): Targets
"""
with open(json_path, "r") as fp:
data = json.load(fp)
mfcc_data = data["mfcc"]
labels = data["labels"]
# Make testing data
mfcc_test = []
labels_test = []
for i in range(int(len(mfcc_data)/10)):
mfcc_test.append(mfcc_data[i])
mfcc_data.remove(mfcc_data[i])
labels_test.append(labels[i])
labels.remove(labels[i])
# Convert lists to numpy arrays
x = np.array(mfcc_data)
y = np.array(labels)
x_test = np.array(mfcc_test)
y_test = np.array(labels_test)
print("Data succesfully loaded!")
# Create train/val split
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=test_size)
if not dropout:
# Build network topology
model = keras.Sequential([
# Input layer
keras.layers.Flatten(input_shape=(x.shape[1], x.shape[2])),
# 1st dense layer
keras.layers.Dense(512, activation=activation),
# 2nd dense layer
keras.layers.Dense(256, activation=activation),
# 3rd dense layer
keras.layers.Dense(64, activation=activation),
# Output layer
keras.layers.Dense(10, activation='softmax')
])
else:
model = keras.Sequential([
# Input layer
keras.layers.Flatten(input_shape=(x.shape[1], x.shape[2])),
# 1st dense layer
keras.layers.Dense(512, activation=activation),
# 1st dropout layer
keras.layers.Dropout(0.2),
# 2nd dense layer
keras.layers.Dense(128, activation=activation),
# 2nd dropout layer
keras.layers.Dropout(0.2),
# 3rd dense layer
keras.layers.Dense(64, activation=activation),
# 3rd dropout layers
keras.layers.Dropout(0.2),
# Output layer
keras.layers.Dense(10, activation='softmax')
])
# Compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
# Train model
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=32, epochs=epochs)
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print('\nTest Loss: {}'.format(test_loss))
print('\nTest Accuracy: {}'.format(test_acc))
# History object from .fit()
history_dict = history.history
history_dict.keys()
# Save the model
model.save(model_name)
if __name__ == "__main__":
create_model()