-
Notifications
You must be signed in to change notification settings - Fork 1
/
pitch_detection.py
120 lines (77 loc) · 2.91 KB
/
pitch_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import onset_detection
import wav
import os.path
import sys
import math
import numpy as np
import matplotlib.pyplot as plt
from keras.models import load_model
def extract_notes(pcm_data, onset_times, samplerate):
# filter all notes from an audio file
time = 0.1
numfreq = 1024
overlap = 0
notes = []
for onset in onset_times:
pcm_slice = pcm_data[onset:onset+math.floor(time*samplerate)]
fft1 = wav.compute_fft(pcm_slice, 0, time, samplerate, 32, 0)
fft2 = wav.compute_fft(pcm_slice, 0, time, samplerate, 256, 0)
fft = wav.compute_fft(pcm_slice, 0, time, samplerate, numfreq, overlap)
X = np.concatenate((fft1.flatten(), fft2.flatten(), fft.flatten())) # fft slice
notes.append(X)
return np.array(notes)
def load_wav(wav_file):
# load audio file
pcm_data, samplerate = wav.read_wav(wav_file, 400)
sums, onset_times = onset_detection.detect_onset(pcm_data, samplerate, False, True)
return pcm_data, onset_times, samplerate, sums
def evaluate_model(filename):
# apply trained model on an audio file to predict pitches
if os.path.isfile('model/piano_model.h5'):
print("load model")
model = load_model('model/piano_model.h5')
else:
print("no model available")
return
pcm_data, onset_times, samplerate, sums = load_wav(filename)
notes = extract_notes(pcm_data, onset_times, samplerate)
model.compile(loss='mean_squared_error', optimizer='adamax')
trainingfiles = wav.get_training_filenames()
for i in range(len(trainingfiles)):
trainingfiles[i] = trainingfiles[i].split("/")[1].split(".")[0]
pred = model.predict(notes)
predicted_notes = []
for i in range(len(pred)):
best_guess = 0
num = 0
nums = []
for e, f in enumerate(pred[i]):
if best_guess < f:
best_guess = f
num = e
if f > 0.85:
nums.append(e)
results = [trainingfiles[i] for i in nums]
predicted_notes.append(results)
print("Best guess:", " ".join(results))
plot_pitches(sums, samplerate, onset_times, predicted_notes)
def plot_pitches(sums, samplerate, onset_times, predicted_notes):
# create a visual representation of the onset_times
# debug function
print("plot onset_times")
sums = np.asarray(sums)
time = np.arange(len(sums)) / samplerate
# draw a plot of the compressed audio samples
plt.figure(1)
plt.subplot(211)
plt.plot(time, sums)
# draw a vertical line at each detected onset
for e, i in enumerate(onset_times):
xpos = i/samplerate
for u, j in enumerate(predicted_notes[e]):
plt.text(xpos, 1.3-((u+1)*0.1), j)
plt.plot([xpos, xpos], [0, 1], 'k-', lw=1)
plt.show()
if __name__ == '__main__':
if (len(sys.argv) > 1):
evaluate_model(sys.argv[1])