-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhelpers.py
277 lines (205 loc) · 10.1 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
from pydub import AudioSegment
import matplotlib.pyplot as plt
from scipy.io import wavfile
from tempfile import mktemp
import librosa
import numpy as np
import imagehash
from PIL import Image
import DB_helpers
import logging
# ==============================================================================================
# config of the logger
logging.basicConfig(filename="Shazam.log",
format='%(asctime)s %(message)s',
filemode='w')
logger = logging.getLogger() # Logger maintainer
logger.setLevel(logging.DEBUG)
# ==============================================================================================
class Song:
"""
A class to represent a song.
...
Attributes
----------
path (essential) : str
path of the audio file
audioData (optional) : list
the sampled data of the audio signal in time domain
samplingFreq (optional) : int
the sampling frequency of the audio signal
melSpectrogramFeatureImage (optional) : PIL image
the mel-spectrogram feature represented as a PIL image
mfccFeatureImage (optional) : PIL image
the mfccm feature represented as a PIL image
Methods
-------
readAudioFile():
reads an mp3 audio file.
generateAndSaveSpectrogram():
generates and saves the spectrogram of a signal.
generateFeatures():
generates the spectral features of a signal.
saveFeatures():
saves the spectral features of a signal as a PNG image.
generateFingerprint():
generates the fingerprint of each song as a hashed mfcc & mel-spectrogram features.
"""
def __init__(self, path=''):
"""
Constructs all the necessary attributes for the song object.
Parameters
----------
path : str
path of the audio file
"""
self.path = path
if(self.path):
self.samplingFreq, self.audioData = self.readAudioFile()
logger.debug(self.path+" : audio has been read succesfully")
# ==============================================================================================
def readAudioFile(self):
"""
readAudioFile reads an mp3 audio file.
:return: data & sampling frequency of the signal
"""
mp3_audio = AudioSegment.from_file(
self.path, format="mp3") # read mp3
mp3_audio = mp3_audio[:60000] # take the first 60 sec of the audio
wname = mktemp('.wav') # use temporary file
mp3_audio.export(wname, format="wav", parameters=[
"-ac", "1"]) # convert to wav # convert to mono wav instead of stereo
samplingFreq, audioData = wavfile.read(wname) # read wav file
return samplingFreq, audioData
# ==============================================================================================
def generateAndSaveSpectrogram(self):
"""
generateAndSaveSpectrogram generates and saves the spectrogram of a signal.
"""
fig = plt.figure() # create a figure
spectro = plt.specgram(self.audioData, Fs=self.samplingFreq,
NFFT=128, noverlap=0) # plot spectrogram
spectrogamFilePath = self.path[:-4] # remove '.mp3' from the path
fig.savefig(spectrogamFilePath) # save the spectrogram
plt.close(fig)
logger.debug(
self.path+" : spectrogram has been saved successfully")
# ==============================================================================================
def generateFeatures(self):
"""
generateFeatures generates the spectral features of a signal.
:return: mfcc & mel-spectrogram features as images
"""
# generate the mel-spectrogram spectral feature
melSpectrogram = librosa.feature.melspectrogram(
self.audioData.astype('float64'), sr=self.samplingFreq)
# generate the mfcc spectral feature
mfcc = librosa.feature.mfcc(
self.audioData.astype('float64'), sr=self.samplingFreq)
# convert the array of the feature to a PIL image
self.melSpectrogramFeatureImage = Image.fromarray(
melSpectrogram, mode='RGB')
self.mfccFeatureImage = Image.fromarray(mfcc, mode='RGB')
# ==============================================================================================
def saveFeatures(self):
"""
saveFeatures saves the spectral features of a signal as a PNG image.
"""
# remove '.mp3' from the path and add the corresponding name
melSpectrogramPath = self.path[:-4] + '_melSpectrogam.png'
mfccPath = self.path[:-4] + '_mfcc.png'
self.melSpectrogramFeatureImage.save(melSpectrogramPath)
self.mfccFeatureImage.save(mfccPath)
logger.debug(
self.path+" : features has been saved successfully")
# ==============================================================================================
def generateFingerprint(self):
"""
generateFingerprint generates the fingerprint of each song as a hashed mfcc & mel-spectrogram features.
:return: a dictionary that contains the hashed features as: key=> name of the feature, value=> hash of the feature
"""
SongHashes = {}
self.generateFeatures()
SongHashes['melSpectrogramHash'] = generatePerceptualHash(
self.melSpectrogramFeatureImage)
SongHashes['mfccHash'] = generatePerceptualHash(self.mfccFeatureImage)
logger.debug(
self.path+" : fingerprint has been generated successfully")
return SongHashes
# ==============================================================================================
def generatePerceptualHash(feature):
"""
generatePerceptualHash computes the perceptual hash of a signal feature represented by an image.
:param feature: a PIL image that represents a signal feature
:return: the hashed feature as a string
"""
featureHash = imagehash.phash( # generate perceptual hash
feature, hash_size=16)
return str(featureHash)
# ==============================================================================================
def getHammingDistance(hashOne, hashTwo):
"""
getHammingDistance calculates the hamming distance between two strings which represents the differences between them.
:param hashOne: the first hash represented as a string
:param hashTwo: the second hash represented as a string
:return: the hamming distance as an integer
"""
# convert each string to an imageHash object so we can calculate the hamming distance by substracting one from the other
return imagehash.hex_to_hash(hashOne)-imagehash.hex_to_hash(hashTwo)
# ==============================================================================================
def mapValue(inputValue: float, inputMin: float, inputMax: float, outputMin: float, outputMax: float):
"""
mapValue maps a value from a certain range to another.
:param inputValue: the wannabe mapped value with its current range of inputMin & inputMax
:param inputMin: minimum range of the input value
:param inputMax: maximum range of the input value
:param outputMin: minimum range of the output value
:param outputMax: maximum range of the output value
:return: the mapped value within the range of outputMin & outputMax
"""
slope = (outputMax-outputMin) / (inputMax-inputMin)
return outputMin + slope*(inputValue-inputMin)
# ==============================================================================================
def compareFingerprint(userSongHashes):
"""
compareFingerprint compares the hashed fingerprint of a user-given song against the hashed fingerprint of all database songs.
:param userSongHashes: a dictionary that contains the hash of mfcc & mel-spectrogram features
:return: a dictionary that contains the result of the comparison process as: key=> name of the song, value=> the similarity index of the two compared songs
"""
# initialization
similarityResults = {}
songComponents = ['Full', 'Music', 'Vocals']
# fetches the hashes from the database
databaseSongsHash = DB_helpers.readFingerprintDatabase()
logger.debug(
"all the hashes in the database has been retrieved successfully")
# iterate over the hashes to get a result against each one
for songHash in databaseSongsHash:
# generate the song name from the ID that we fetched from the database
songName = 'Group' + songHash['ID'][0:2]+'_Song'+songHash['ID'][2]
maxSimilarityIndex = 0
# iterate over each song component (full,music,vocal)
for songComponent in songComponents:
# compute hamming distance of each feature
melSpectrogramHammingDistance = getHammingDistance(
songHash['mel-spectrogram'][songComponent], userSongHashes['melSpectrogramHash'])
mfccHammingDistance = getHammingDistance(
songHash['mfcc'][songComponent], userSongHashes['mfccHash'])
# get the average of the differences of the two features
avgDifference = (melSpectrogramHammingDistance +
mfccHammingDistance)/2
# map the value between 0 => 1 instead of 0 => 256, sso we can represent it better
mappedAvgDifference = mapValue(avgDifference, 0, 256, 0, 1)
# calculate the similarity index
similarityIndex = int((1-mappedAvgDifference)*100)
# save the results in the dictionary mentioned above
if(similarityIndex > maxSimilarityIndex):
similarityResults.update({songName: similarityIndex})
maxSimilarityIndex = similarityIndex
logger.debug(
"similarity index for " + songName + " has been generated successfully")
# sort the results in a descending order
similarityResults = sorted(
similarityResults.items(), key=lambda x: x[1], reverse=True)
return similarityResults
# ==============================================================================================