-
Notifications
You must be signed in to change notification settings - Fork 0
/
speech-analysis.cpp
59 lines (44 loc) · 1.63 KB
/
speech-analysis.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <iostream>
#include <vector>
#include <cmath>
#include <fftw3.h>
using namespace std;
const int WINDOW_SIZE = 512; // Size of the FFT window
const int HOP_SIZE = 256; // Hop size between consecutive windows
vector<double> computeSTFT(vector<double>& audio, int sampleRate) {
int audioLength = audio.size();
int numWindows = ceil((double)(audioLength - WINDOW_SIZE) / HOP_SIZE);
int fftSize = (WINDOW_SIZE / 2) + 1;
fftw_complex* fftIn = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * WINDOW_SIZE);
fftw_complex* fftOut = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * fftSize);
fftw_plan fftPlan = fftw_plan_dft_1d(WINDOW_SIZE, fftIn, fftOut, FFTW_FORWARD, FFTW_MEASURE);
vector<double> stft(fftSize * numWindows, 0.0);
for (int i = 0; i < numWindows; i++) {
int startIndex = i * HOP_SIZE;
int endIndex = startIndex + WINDOW_SIZE;
if (endIndex > audioLength) endIndex = audioLength;
int windowSize = endIndex - startIndex;
vector<double> window(audio.begin() + startIndex, audio.begin() + endIndex);
for (int j = 0; j < windowSize; j++) {
fftIn[j][0] = window[j] * sin(M_PI * j / (windowSize - 1));
fftIn[j][1] = 0.0;
}
fftw_execute(fftPlan);
for (int j = 0; j < fftSize; j++) {
stft[i * fftSize + j] = sqrt(pow(fftOut[j][0], 2) + pow(fftOut[j][1], 2));
}
}
fftw_destroy_plan(fftPlan);
fftw_free(fftIn);
fftw_free(fftOut);
return stft;
}
int main() {
// Load audio from file or microphone input
// ...
// Compute STFT of audio signal
vector<double> stft = computeSTFT(audio, sampleRate);
// Extract features from STFT
// ...
return 0;
}