-
Notifications
You must be signed in to change notification settings - Fork 0
Low-latency pitch detection (phases 1-4) #29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
53a8ead
fcb587d
7022076
81d6e46
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,40 +4,46 @@ | |
|
|
||
| void YinPitchDetector::prepare(double sampleRate) | ||
| { | ||
| decimation = (sampleRate > 50000.0) ? 4 : 2; | ||
| decimationCounter = 0; | ||
| decimationAccum = 0.0f; | ||
| analysisSR = sampleRate / decimation; | ||
| analysisSR = sampleRate; | ||
|
|
||
| windowSize = 2048; | ||
| halfWindow = windowSize / 2; | ||
| halfWindow = static_cast<int>(std::ceil(sampleRate / 70.0)); | ||
| windowSize = 2 * halfWindow; | ||
| hopSize = static_cast<int>(std::ceil(sampleRate * 0.003)); | ||
|
|
||
| fftOrder = static_cast<int>(std::ceil(std::log2(2.0 * windowSize))); | ||
| fftSize = 1 << fftOrder; | ||
| fft = std::make_unique<juce::dsp::FFT>(fftOrder); | ||
| fftInput.resize(static_cast<size_t>(fftSize * 2), 0.0f); | ||
| fftOutput.resize(static_cast<size_t>(fftSize * 2), 0.0f); | ||
|
|
||
| buffer.assign(static_cast<size_t>(windowSize), 0.0f); | ||
| linearBuffer.resize(static_cast<size_t>(windowSize)); | ||
| diff.resize(static_cast<size_t>(halfWindow)); | ||
| cmndf.resize(static_cast<size_t>(halfWindow)); | ||
|
|
||
| writePos = 0; | ||
| bufferFull = false; | ||
| hopCounter = 0; | ||
| windowFilled = false; | ||
| lastResult = {}; | ||
| } | ||
|
|
||
| void YinPitchDetector::feedSample(float sample) | ||
| { | ||
| decimationAccum += sample; | ||
| if (++decimationCounter < decimation) | ||
| return; | ||
|
|
||
| float decimatedSample = decimationAccum / static_cast<float>(decimation); | ||
| decimationAccum = 0.0f; | ||
| decimationCounter = 0; | ||
| buffer[static_cast<size_t>(writePos)] = sample; | ||
| writePos = (writePos + 1) % windowSize; | ||
| ++hopCounter; | ||
|
|
||
| buffer[static_cast<size_t>(writePos)] = decimatedSample; | ||
| ++writePos; | ||
| if (!windowFilled) | ||
| { | ||
| if (writePos == 0) | ||
| windowFilled = true; | ||
| else | ||
| return; | ||
| } | ||
|
|
||
| if (writePos >= windowSize) | ||
| if (hopCounter >= hopSize) | ||
| { | ||
| writePos = 0; | ||
| bufferFull = true; | ||
| hopCounter = 0; | ||
| analyse(); | ||
| } | ||
| } | ||
|
|
@@ -49,24 +55,47 @@ PitchResult YinPitchDetector::getResult() const | |
|
|
||
| void YinPitchDetector::analyse() | ||
| { | ||
| if (!bufferFull) | ||
| return; | ||
|
|
||
| auto n = static_cast<size_t>(halfWindow); | ||
|
|
||
| // Step 1: Difference function | ||
| for (size_t tau = 0; tau < n; ++tau) | ||
| for (int i = 0; i < windowSize; ++i) | ||
| linearBuffer[static_cast<size_t>(i)] = buffer[static_cast<size_t>((writePos + i) % windowSize)]; | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ring buffer linearization copies the full window every hop -- consider whether this matters for Phase 2. Right now this copies 1024 floats every 128 samples, which is fine. But worth noting: when Phase 2 adds FFT-accelerated autocorrelation, you'll need to copy data into the FFT buffer anyway. At that point this Not a blocker for this phase, just flagging so Phase 2 doesn't end up doing two copies. |
||
|
|
||
| std::fill(fftInput.begin(), fftInput.end(), 0.0f); | ||
| for (size_t i = 0; i < n; ++i) | ||
| fftInput[i] = linearBuffer[i]; | ||
| fft->performRealOnlyForwardTransform(fftInput.data()); | ||
|
|
||
| std::fill(fftOutput.begin(), fftOutput.end(), 0.0f); | ||
| for (int i = 0; i < windowSize; ++i) | ||
| fftOutput[static_cast<size_t>(i)] = linearBuffer[static_cast<size_t>(i)]; | ||
| fft->performRealOnlyForwardTransform(fftOutput.data()); | ||
|
|
||
| for (int k = 0; k < fftSize; ++k) | ||
| { | ||
| float sum = 0.0f; | ||
| for (size_t j = 0; j < n; ++j) | ||
| { | ||
| float d = buffer[j] - buffer[j + tau]; | ||
| sum += d * d; | ||
| } | ||
| diff[tau] = sum; | ||
| float aRe = fftInput[static_cast<size_t>(2 * k)]; | ||
| float aIm = fftInput[static_cast<size_t>(2 * k + 1)]; | ||
| float bRe = fftOutput[static_cast<size_t>(2 * k)]; | ||
| float bIm = fftOutput[static_cast<size_t>(2 * k + 1)]; | ||
| fftInput[static_cast<size_t>(2 * k)] = aRe * bRe + aIm * bIm; | ||
| fftInput[static_cast<size_t>(2 * k + 1)] = aRe * bIm - aIm * bRe; | ||
| } | ||
|
|
||
| fft->performRealOnlyInverseTransform(fftInput.data()); | ||
|
|
||
| float powerTerm0 = 0.0f; | ||
| for (size_t j = 0; j < n; ++j) | ||
| powerTerm0 += linearBuffer[j] * linearBuffer[j]; | ||
|
|
||
| float powerTermTau = powerTerm0; | ||
|
|
||
| diff[0] = 0.0f; | ||
| for (size_t tau = 1; tau < n; ++tau) | ||
| { | ||
| powerTermTau += linearBuffer[n + tau - 1] * linearBuffer[n + tau - 1] | ||
| - linearBuffer[tau - 1] * linearBuffer[tau - 1]; | ||
| diff[tau] = powerTerm0 + powerTermTau - 2.0f * fftInput[tau]; | ||
| } | ||
|
|
||
| // Step 2: Cumulative mean normalized difference function | ||
| cmndf[0] = 1.0f; | ||
| float runningSum = 0.0f; | ||
|
|
||
|
|
@@ -79,7 +108,6 @@ void YinPitchDetector::analyse() | |
| cmndf[tau] = 1.0f; | ||
| } | ||
|
|
||
| // Step 3: Absolute threshold | ||
| size_t tauEstimate = 0; | ||
| for (size_t tau = 2; tau < n; ++tau) | ||
| { | ||
|
|
@@ -98,7 +126,6 @@ void YinPitchDetector::analyse() | |
| return; | ||
| } | ||
|
|
||
| // Step 4: Parabolic interpolation | ||
| float betterTau = static_cast<float>(tauEstimate); | ||
|
|
||
| if (tauEstimate > 0 && tauEstimate < n - 1) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hardcoded
windowSize = 1024ignores sample rate -- E2 detection will fail at 96kHz.At 44.1kHz, 1024 samples = ~23ms, which covers ~1.9 periods of E2 (82.4Hz). That's tight but workable with parabolic interpolation.
At 96kHz, 1024 samples = ~10.7ms, which covers only ~0.88 periods of E2. The YIN algorithm needs at least 2 periods in the analysis window (
halfWindow= 512 samples = ~5.3ms at 96kHz = 0.44 periods). This is fundamentally insufficient -- YIN cannot find a valid tau for E2 because the period (~1164 samples at 96kHz) exceedshalfWindow.The old code handled this via decimation (effectively analyzing at 24kHz at 96k SR). Since decimation is removed, the window size should scale with sample rate to maintain the same frequency coverage:
This gives 1024 at 44.1k, 1115 at 48k, 2227 at 96k -- preserving the ~23ms analysis window regardless of sample rate.
The
hopSizeshould probably scale proportionally too to maintain the same ~3ms update interval.