5
5
from python_speech_features import sigproc
6
6
from scipy .fftpack import dct
7
7
8
+ def calculate_nfft (samplerate , winlen ):
9
+ """Calculates the FFT size as a power of two greater than or equal to
10
+ the number of samples in a single window length.
11
+
12
+ Having an FFT less than the window length loses precision by dropping
13
+ many of the samples; a longer FFT than the window allows zero-padding
14
+ of the FFT buffer which is neutral in terms of frequency domain conversion.
15
+
16
+ :param samplerate: The sample rate of the signal we are working with, in Hz.
17
+ :param winlen: The length of the analysis window in seconds.
18
+ """
19
+ window_length_samples = winlen * samplerate
20
+ nfft = 1
21
+ while nfft < window_length_samples :
22
+ nfft *= 2
23
+ return nfft
24
+
8
25
def mfcc (signal ,samplerate = 16000 ,winlen = 0.025 ,winstep = 0.01 ,numcep = 13 ,
9
- nfilt = 26 ,nfft = 512 ,lowfreq = 0 ,highfreq = None ,preemph = 0.97 ,ceplifter = 22 ,appendEnergy = True ,
26
+ nfilt = 26 ,nfft = None ,lowfreq = 0 ,highfreq = None ,preemph = 0.97 ,ceplifter = 22 ,appendEnergy = True ,
10
27
winfunc = lambda x :numpy .ones ((x ,))):
11
28
"""Compute MFCC features from an audio signal.
12
29
13
30
:param signal: the audio signal from which to compute features. Should be an N*1 array
14
- :param samplerate: the samplerate of the signal we are working with.
31
+ :param samplerate: the sample rate of the signal we are working with, in Hz .
15
32
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
16
33
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
17
34
:param numcep: the number of cepstrum to return, default 13
18
35
:param nfilt: the number of filters in the filterbank, default 26.
19
- :param nfft: the FFT size. Default is 512 .
36
+ :param nfft: the FFT size. Default is None, which uses the calculate_nfft function to choose the smallest size that does not drop sample data .
20
37
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
21
38
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
22
39
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
@@ -25,6 +42,7 @@ def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
25
42
:param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
26
43
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
27
44
"""
45
+ nfft = nfft or calculate_nfft (samplerate , winlen )
28
46
feat ,energy = fbank (signal ,samplerate ,winlen ,winstep ,nfilt ,nfft ,lowfreq ,highfreq ,preemph ,winfunc )
29
47
feat = numpy .log (feat )
30
48
feat = dct (feat , type = 2 , axis = 1 , norm = 'ortho' )[:,:numcep ]
@@ -38,7 +56,7 @@ def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
38
56
"""Compute Mel-filterbank energy features from an audio signal.
39
57
40
58
:param signal: the audio signal from which to compute features. Should be an N*1 array
41
- :param samplerate: the samplerate of the signal we are working with.
59
+ :param samplerate: the sample rate of the signal we are working with, in Hz .
42
60
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
43
61
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
44
62
:param nfilt: the number of filters in the filterbank, default 26.
@@ -69,7 +87,7 @@ def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
69
87
"""Compute log Mel-filterbank energy features from an audio signal.
70
88
71
89
:param signal: the audio signal from which to compute features. Should be an N*1 array
72
- :param samplerate: the samplerate of the signal we are working with.
90
+ :param samplerate: the sample rate of the signal we are working with, in Hz .
73
91
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
74
92
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
75
93
:param nfilt: the number of filters in the filterbank, default 26.
@@ -89,7 +107,7 @@ def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
89
107
"""Compute Spectral Subband Centroid features from an audio signal.
90
108
91
109
:param signal: the audio signal from which to compute features. Should be an N*1 array
92
- :param samplerate: the samplerate of the signal we are working with.
110
+ :param samplerate: the sample rate of the signal we are working with, in Hz .
93
111
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
94
112
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
95
113
:param nfilt: the number of filters in the filterbank, default 26.
@@ -134,7 +152,7 @@ def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
134
152
135
153
:param nfilt: the number of filters in the filterbank, default 20.
136
154
:param nfft: the FFT size. Default is 512.
137
- :param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
155
+ :param samplerate: the sample rate of the signal we are working with, in Hz . Affects mel spacing.
138
156
:param lowfreq: lowest band edge of mel filters, default 0 Hz
139
157
:param highfreq: highest band edge of mel filters, default samplerate/2
140
158
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
0 commit comments