Skip to content

Commit 40c5902

Browse files
authored
Merge pull request #76 from erikma/dev/erikma/autoNfft
Fix "WARNING:root:frame length (X) is greater than FFT size"
2 parents 044d307 + 9ab3287 commit 40c5902

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

python_speech_features/base.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,35 @@
55
from python_speech_features import sigproc
66
from scipy.fftpack import dct
77

8+
def calculate_nfft(samplerate, winlen):
9+
"""Calculates the FFT size as a power of two greater than or equal to
10+
the number of samples in a single window length.
11+
12+
Having an FFT less than the window length loses precision by dropping
13+
many of the samples; a longer FFT than the window allows zero-padding
14+
of the FFT buffer which is neutral in terms of frequency domain conversion.
15+
16+
:param samplerate: The sample rate of the signal we are working with, in Hz.
17+
:param winlen: The length of the analysis window in seconds.
18+
"""
19+
window_length_samples = winlen * samplerate
20+
nfft = 1
21+
while nfft < window_length_samples:
22+
nfft *= 2
23+
return nfft
24+
825
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
9-
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True,
26+
nfilt=26,nfft=None,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True,
1027
winfunc=lambda x:numpy.ones((x,))):
1128
"""Compute MFCC features from an audio signal.
1229
1330
:param signal: the audio signal from which to compute features. Should be an N*1 array
14-
:param samplerate: the samplerate of the signal we are working with.
31+
:param samplerate: the sample rate of the signal we are working with, in Hz.
1532
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
1633
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
1734
:param numcep: the number of cepstrum to return, default 13
1835
:param nfilt: the number of filters in the filterbank, default 26.
19-
:param nfft: the FFT size. Default is 512.
36+
:param nfft: the FFT size. Default is None, which uses the calculate_nfft function to choose the smallest size that does not drop sample data.
2037
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
2138
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
2239
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
@@ -25,6 +42,7 @@ def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
2542
:param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
2643
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
2744
"""
45+
nfft = nfft or calculate_nfft(samplerate, winlen)
2846
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
2947
feat = numpy.log(feat)
3048
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
@@ -38,7 +56,7 @@ def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
3856
"""Compute Mel-filterbank energy features from an audio signal.
3957
4058
:param signal: the audio signal from which to compute features. Should be an N*1 array
41-
:param samplerate: the samplerate of the signal we are working with.
59+
:param samplerate: the sample rate of the signal we are working with, in Hz.
4260
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
4361
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
4462
:param nfilt: the number of filters in the filterbank, default 26.
@@ -69,7 +87,7 @@ def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
6987
"""Compute log Mel-filterbank energy features from an audio signal.
7088
7189
:param signal: the audio signal from which to compute features. Should be an N*1 array
72-
:param samplerate: the samplerate of the signal we are working with.
90+
:param samplerate: the sample rate of the signal we are working with, in Hz.
7391
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
7492
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
7593
:param nfilt: the number of filters in the filterbank, default 26.
@@ -89,7 +107,7 @@ def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
89107
"""Compute Spectral Subband Centroid features from an audio signal.
90108
91109
:param signal: the audio signal from which to compute features. Should be an N*1 array
92-
:param samplerate: the samplerate of the signal we are working with.
110+
:param samplerate: the sample rate of the signal we are working with, in Hz.
93111
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
94112
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
95113
:param nfilt: the number of filters in the filterbank, default 26.
@@ -134,7 +152,7 @@ def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
134152
135153
:param nfilt: the number of filters in the filterbank, default 20.
136154
:param nfft: the FFT size. Default is 512.
137-
:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
155+
:param samplerate: the sample rate of the signal we are working with, in Hz. Affects mel spacing.
138156
:param lowfreq: lowest band edge of mel filters, default 0 Hz
139157
:param highfreq: highest band edge of mel filters, default samplerate/2
140158
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.

0 commit comments

Comments
 (0)