-
Notifications
You must be signed in to change notification settings - Fork 13
/
vad.go
49 lines (38 loc) · 989 Bytes
/
vad.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package alexa
import (
"math"
"github.com/mjibson/go-dsp/fft"
)
// Voice Activity Detection (that's what they call it when you detect
// that someone has started (or stopped) talking).
type VAD struct {
samples []complex128
fft []complex128
spectrum []float64
lastSpectrum []float64
}
func NewVAD(width int) *VAD {
return &VAD{
samples: make([]complex128, width),
spectrum: make([]float64, width/2+1),
lastSpectrum: make([]float64, width/2+1),
}
}
// Given the samples, return the spectral flux value as compared
// to the previous samples.
func (v *VAD) Flux(samples []int16) float64 {
for i, s := range samples {
v.samples[i] = complex(float64(s), 0)
}
v.fft = fft.FFT(v.samples)
copy(v.spectrum, v.lastSpectrum)
for i, _ := range v.spectrum {
c := v.fft[i]
v.spectrum[i] = math.Sqrt(real(c)*real(c) + imag(c)*imag(c))
}
var flux float64
for i, s := range v.spectrum {
flux += (s - v.lastSpectrum[i])
}
return flux
}