-
Notifications
You must be signed in to change notification settings - Fork 0
/
mfcc.conf
196 lines (168 loc) · 6.05 KB
/
mfcc.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
///////////////////////////////////////////////////////////////////////////////////////
///////// > openSMILE configuration file to extract MFCC features < //////////////////
///////// HTK target kind: MFCC_0_D_A, numCeps=12 //////////////////
///////// //////////////////
///////// * written 2009 by Florian Eyben * //////////////////
///////// //////////////////
///////// (c) audEERING UG (haftungsbeschränkt), //////////////////
///////// All rights reserverd. //////////////////
///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////
;
; This section is always required in openSMILE configuration files
; it configures the componentManager and gives a list of all components which are to be loaded
; The order in which the components are listed should match
; the order of the data flow for most efficient processing
;
///////////////////////////////////////////////////////////////////////////////////////
[componentInstances:cComponentManager]
instance[dataMemory].type=cDataMemory
; wave file input
instance[waveIn].type=cWaveSource
; audio framer
instance[frame].type=cFramer
; speech pre-emphasis (on a per frame basis as HTK does it)
instance[pe].type=cVectorPreemphasis
; apply a window function to pre-emphasised frames
instance[win].type=cWindower
; transform to the frequency domain using FFT
instance[fft].type=cTransformFFT
; compute magnitude of the complex fft from the previous component
instance[fftmag].type=cFFTmagphase
; compute Mel-bands from magnitude spectrum
instance[melspec].type=cMelspec
; compute MFCC from Mel-band spectrum
instance[mfcc].type=cMfcc
; compute delta coefficients from mfcc and energy
instance[delta].type=cDeltaRegression
; compute acceleration coefficients from delta coefficients of mfcc and energy
instance[accel].type=cDeltaRegression
; write the result to an HTK parameter file
instance[htkout].type=cHtkSink
; enable this line for CSV output for gnuplot scripts
instance[csvSink].type=cCsvSink
; run single threaded (nThreads=1)
; NOTE: a single thread is more efficient for processing small files, since multi-threaded processing involves more
; overhead during startup, which will make the system slower in the end
nThreads=1
; do not show any internal dataMemory level settings
; (if you want to see them set the value to 1, 2, 3, or 4, depending on the amount of detail you wish)
printLevelStats=0
/////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////// component configuration ////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////
; the following sections configure the components listed above
; a help on configuration parameters can be obtained with
; SMILExtract -H
; or
; SMILExtract -H configTypeName (= componentTypeName)
/////////////////////////////////////////////////////////////////////////////////////////////
[waveIn:cWaveSource]
; this sets the level this component writes to
; the level will be created by this component
; no other components may write to a level having the same name
writer.dmLevel=wave
; this defines a new commandline option "-I" or "-inputfile", which can be used to specify
; the filename on the commandline instead of having it "hard-coded" in the config file
filename=\cm[inputfile(I){test.wav}:name of input file]
; mix stereo files down to mono for analysis
monoMixdown=1
[frame:cFramer]
reader.dmLevel=wave
writer.dmLevel=frames
noPostEOIprocessing = 1
copyInputName = 1
frameSize = 1.0250
frameStep = 0.50
frameMode = fixed
frameCenterSpecial = left
[pe:cVectorPreemphasis]
reader.dmLevel=frames
writer.dmLevel=framespe
k = 0.97
de = 0
[win:cWindower]
reader.dmLevel=framespe
writer.dmLevel=winframes
copyInputName = 1
processArrayFields = 1
; hamming window
winFunc = ham
; no gain, no offset
gain = 1.0
offset = 0
[fft:cTransformFFT]
reader.dmLevel=winframes
writer.dmLevel=fft
copyInputName = 1
processArrayFields = 1
inverse = 0
[fftmag:cFFTmagphase]
reader.dmLevel=fft
writer.dmLevel=fftmag
copyInputName = 1
processArrayFields = 1
inverse = 0
magnitude = 1
phase = 0
[melspec:cMelspec]
reader.dmLevel=fftmag
writer.dmLevel=melspec
copyInputName = 1
processArrayFields = 1
; htk compatible sample value scaling
htkcompatible = 1
nBands = 26
; use power spectrum instead of magnitude spectrum
usePower = 1
lofreq = 0
hifreq = 8000
specScale = mel
inverse = 0
[mfcc:cMfcc]
reader.dmLevel=melspec
writer.dmLevel=ft0
copyInputName = 1
processArrayFields = 1
firstMfcc = 0
lastMfcc = 12
cepLifter = 22.0
htkcompatible = 1
[delta:cDeltaRegression]
reader.dmLevel=ft0
writer.dmLevel=ft0de
nameAppend = de
copyInputName = 1
noPostEOIprocessing = 0
deltawin=2
blocksize=1
[accel:cDeltaRegression]
reader.dmLevel=ft0de
writer.dmLevel=ft0dede
nameAppend = de
copyInputName = 1
noPostEOIprocessing = 0
deltawin=2
blocksize=1
//////////////////////////////////////////////////////////////////////
/////////////////// data output configuration //////////////////////
//////////////////////////////////////////////////////////////////////
; the HTK sink writes data in HTK parameter format
[htkout:cHtkSink]
; data from the following dataMemory levels in concattenated
reader.dmLevel=ft0;ft0de;ft0dede
; this again defines a commandline option for the output file (see waveIn)
filename=\cm[output(O){mfcc.htk}:name of MFCC output filename (HTK format)]
append = 0
; MFCC_0_D_A 6+256+512+8192 = 8966
parmKind = 8966
; csv sink for output of data to be visualised with gnuplot
[csvSink:cCsvSink]
reader.dmLevel = ft0
filename = \cm[outputcsv{?}:name of output CSV file for auditory spectrum, no deltas]
delimChar = ,
append = 0
timestamp = 0
number = 0
printHeader = 1
//////---------------------- END -------------------------///////