-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_information.py
145 lines (118 loc) · 4.9 KB
/
extract_information.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
This file provides function to extract chords or melody from the given file.
This is basically a wrapper to other libraries.
Return data in JAMs format, which is different for chords and melody.
"""
import librosa
import vamp
import numpy as np
from crema import analyze as crema_analyze
def extract_chords_chordino(audio_path):
audio_1, sr_1 = librosa.load(audio_path, sr=44100, mono=True)
duration = librosa.get_duration(audio_1, sr_1)
chords = vamp.collect(audio_1, sr_1, "nnls-chroma:chordino")
# Timestamp is of type RealTime. Convert to float first
chords_casted = {
'list': [{'timestamp': float(c['timestamp']), 'label': c['label']} for c in chords['list']]
}
# use jams format
jams_format = {
"annotations": [
{
"annotation_metadata": {
"annotator": {},
"annotation_tools": "nnls-chroma:chordino",
"version": "",
"annotation_rules": "",
"data_source": "program",
"corpus": "",
"validation": "",
"curator": {
"name": "",
"email": ""
}
},
"file_metadata": {
"release": "",
"artist": "",
"jams_version": "0.3.4",
"title": "",
"duration": duration,
"identifiers": {}
},
"sandbox": {},
"data": [{"time": c["timestamp"],
"confidence": None, # not provided by chordino
"duration": None, # not provided by chordino
"value": c["label"]
} for c in chords_casted["list"]]
}
]
}
return jams_format
def extract_chords_crema(audio_path):
return crema_analyze.analyze(audio_path)
def extract_melody_melodia(audio_path):
voicing = .6
# Comments in this function are given by the creator of melodia
# This is how we load audio using Librosa
audio_1, sr_1 = librosa.load(audio_path, sr=44100, mono=True)
# data_1 = vamp.collect(audio_1, sr_1, "mtg-melodia:melodia")
# vector is a tuple of two values: the hop size used for analysis and the array of pitch values
# Note that the hop size is *always* equal to 128/44100.0 = 2.9 ms
# hop_1, melody_1 = data_1['vector']
# parameter values are specified by providing a dicionary to the optional "parameters" parameter:
params = {"minfqr": 100.0, "maxfqr": 1760.0, "voicing": voicing, "minpeaksalience": 0.0}
data_1 = vamp.collect(audio_1, sr_1, "mtg-melodia:melodia", parameters=params)
hop_1, melody_1 = data_1['vector']
# <h3>\*\*\* SUPER IMPORTANT SUPER IMPORTANT \*\*\*</h3>
# For reasons internal to the vamp architecture, THE TIMESTAMP OF THE FIRST VALUE IN THE MELODY ARRAY IS ALWAYS:
#
# ```
# first_timestamp = 8 * hop = 8 * 128/44100.0 = 0.023219954648526078
# ```
#
# This means that the timestamp of the pitch value at index i (starting with i=0) is given by:
#
# ```
# timestamp[i] = 8 * 128/44100.0 + i * (128/44100.0)
# ```
#
# So, if you want to generate a timestamp array to match the pitch values, you do it like this:
timestamps_1 = 8 * 128 / 44100.0 + np.arange(len(melody_1)) * (128 / 44100.0)
# Melodia has 4 parameters:
# * **minfqr**: minimum frequency in Hertz (default 55.0)
# * **maxfqr**: maximum frequency in Hertz (default 1760.0)
# * **voicing**: voicing tolerance. Greater values will result in more pitch contours included in the final melody.
# Smaller values will result in less pitch contours included in the final melody (default 0.2).
# * **minpeaksalience**: (in Sonic Visualiser "Monophonic Noise Filter") is a hack to avoid silence turning into
# junk contours when analyzing monophonic recordings (e.g. solo voice with no accompaniment).
# Generally you want to leave this untouched (default 0.0).
melody = melody_1.tolist()
output = {
'data': [
{
'value': melody,
'time': timestamps_1.tolist()
}
]
}
return output
def extract_melody_piptrack(audio_path):
y, sr = librosa.load(audio_path, sr=44100, mono=True)
pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
strongest_pitches = []
for t in range(1, magnitudes.shape[1]):
index = magnitudes[:, t].argmax()
pitch = pitches[index, t]
strongest_pitches.append(float(pitch))
timestamps_1 = 8 * 128 / 44100.0 + np.arange(len(strongest_pitches)) * (128 / 44100.0)
output = {
'data': [
{
'value': strongest_pitches,
'time': timestamps_1.tolist()
}
]
}
return output