Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plp #55

Merged
merged 5 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ set(CC_SOURCES
${SOURCE_DIR}/analysis/mel_filter_bank_analysis.cc
${SOURCE_DIR}/analysis/mel_frequency_cepstral_coefficients_analysis.cc
${SOURCE_DIR}/analysis/mel_generalized_cepstral_analysis.cc
${SOURCE_DIR}/analysis/perceptual_linear_predictive_coefficients_analysis.cc
${SOURCE_DIR}/analysis/pitch_extraction.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_dio.cc
${SOURCE_DIR}/analysis/pitch_extraction_by_harvest.cc
Expand Down Expand Up @@ -177,7 +178,9 @@ set(CC_SOURCES
${SOURCE_DIR}/math/gaussian_mixture_modeling.cc
${SOURCE_DIR}/math/histogram_calculation.cc
${SOURCE_DIR}/math/inverse_discrete_cosine_transform.cc
${SOURCE_DIR}/math/inverse_discrete_fourier_transform.cc
${SOURCE_DIR}/math/inverse_fast_fourier_transform.cc
${SOURCE_DIR}/math/inverse_fourier_transform.cc
${SOURCE_DIR}/math/levinson_durbin_recursion.cc
${SOURCE_DIR}/math/matrix.cc
${SOURCE_DIR}/math/matrix2d.cc
Expand Down Expand Up @@ -352,6 +355,7 @@ set(MAIN_SOURCES
${SOURCE_DIR}/main/pca.cc
${SOURCE_DIR}/main/pcas.cc
${SOURCE_DIR}/main/phase.cc
${SOURCE_DIR}/main/plp.cc
${SOURCE_DIR}/main/pitch.cc
${SOURCE_DIR}/main/pitch2sin.cc
${SOURCE_DIR}/main/pitch_mark.cc
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Changes from SPTK3
- Nonrecursive MLPG (`mlpg -R 1`)
- Pitch adaptive spectrum estimation (`pitch_spec`)
- Pitch extraction by DIO used in WORLD (`pitch -a 3`)
- PLP extraction (`plp`)
- Pole-zero plot (`gpolezero`)
- Scalar quantization (`quantize` and `dequantize`)
- Sinusoidal generation from pitch (`pitch2sin`)
Expand Down
2 changes: 1 addition & 1 deletion doc/main/mfcc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mfcc

.. doxygenfile:: mfcc.cc

.. seealso:: :ref:`fbank`
.. seealso:: :ref:`fbank` :ref:`plp`

.. doxygenclass:: sptk::MelFrequencyCepstralCoefficientsAnalysis
:members:
11 changes: 11 additions & 0 deletions doc/main/plp.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _plp:

plp
===

.. doxygenfile:: plp.cc

.. seealso:: :ref:`fbank` :ref:`mfcc`

.. doxygenclass:: sptk::PerceptualLinearPredictiveCoefficientsAnalysis
:members:
6 changes: 6 additions & 0 deletions include/SPTK/analysis/mel_filter_bank_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ class MelFilterBankAnalysis {
return is_valid_;
}

/**
* @return Center frequencies in Hz.
*/
bool GetCenterFrequencies(std::vector<double>* center_frequencies) const;

/**
* @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
* @param[out] filter_bank_output @f$C@f$-channel filter-bank outputs.
Expand All @@ -123,6 +128,7 @@ class MelFilterBankAnalysis {

int lower_bin_index_;
int upper_bin_index_;
std::vector<double> center_frequencies_;
std::vector<int> channel_indices_;
std::vector<double> channel_weights_;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// ------------------------------------------------------------------------ //
// Copyright 2021 SPTK Working Group //
// //
// Licensed under the Apache License, Version 2.0 (the "License"); //
// you may not use this file except in compliance with the License. //
// You may obtain a copy of the License at //
// //
// http://www.apache.org/licenses/LICENSE-2.0 //
// //
// Unless required by applicable law or agreed to in writing, software //
// distributed under the License is distributed on an "AS IS" BASIS, //
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
// See the License for the specific language governing permissions and //
// limitations under the License. //
// ------------------------------------------------------------------------ //

#ifndef SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
#define SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_

#include <vector> // std::vector

#include "SPTK/analysis/mel_filter_bank_analysis.h"
#include "SPTK/conversion/linear_predictive_coefficients_to_cepstrum.h"
#include "SPTK/math/inverse_fourier_transform.h"
#include "SPTK/math/levinson_durbin_recursion.h"
#include "SPTK/utils/sptk_utils.h"

namespace sptk {

/**
* Perform perceptual linear predictive (PLP) coefficients analysis.
*
* The input is the half part of power spectrum:
* @f[
* \begin{array}{cccc}
* |X(0)|^2, & |X(1)|^2, & \ldots, & |X(N/2)|^2,
* \end{array}
* @f]
* where @f$N@f$ is the FFT length. The outputs are the @f$M@f$-th order PLP
* features with the zeroth cepstral parameter:
* @f[
* \begin{array}{ccccc}
* c(0), & \bar{c}(1), & \bar{c}(2), & \ldots, & \bar{c}(M)
* \end{array}
* @f]
* and the log-signal energy @f$E@f$.
*
* [1] S. Young et al., &quot;The HTK book,&quot; Cambridge University
* Engineering Department, 2006.
*/
class PerceptualLinearPredictiveCoefficientsAnalysis {
public:
/**
* Buffer for PerceptualLinearPredictiveCoefficientsAnalysis class.
*/
class Buffer {
public:
Buffer() {
}

virtual ~Buffer() {
}

private:
std::vector<double> filter_bank_output_;
std::vector<double> spectrum_;
std::vector<double> cepstrum_;

std::vector<double> real_part_input_;
std::vector<double> real_part_output_;
std::vector<double> imag_part_input_;
std::vector<double> imag_part_output_;

LevinsonDurbinRecursion::Buffer buffer_for_levinson_durbin_recursion_;

friend class PerceptualLinearPredictiveCoefficientsAnalysis;
DISALLOW_COPY_AND_ASSIGN(Buffer);
};

/**
* @param[in] fft_length Number of FFT bins, @f$N@f$.
* @param[in] num_channel Number of channels, @f$C@f$.
* @param[in] num_order Order of cepstral coefficients, @f$M@f$.
* @param[in] liftering_coefficient A parameter of liftering, @f$L@f$.
* @param[in] compression_factor Amplitude compression factor.
* @param[in] sampling_rate Sampling rate in Hz.
* @param[in] lowest_frequency Lowest frequency in Hz.
* @param[in] highest_frequency Highest frequency in Hz.
* @param[in] floor Floor value of raw filter-bank output.
*/
PerceptualLinearPredictiveCoefficientsAnalysis(
int fft_length, int num_channel, int num_order, int liftering_coefficient,
double compression_factor, double sampling_rate, double lowest_frequency,
double highest_frequency, double floor);

virtual ~PerceptualLinearPredictiveCoefficientsAnalysis() {
}

/**
* @return FFT size.
*/
int GetFftLength() const {
return mel_filter_bank_analysis_.GetFftLength();
}

/**
* @return Number of channels.
*/
int GetNumChannel() const {
return mel_filter_bank_analysis_.GetNumChannel();
}

/**
* @return Order of cepstral coefficients.
*/
int GetNumOrder() const {
return levinson_durbin_recursion_.GetNumOrder();
}

/**
* @return Liftering coefficient.
*/
int GetLifteringCoefficient() const {
return liftering_coefficient_;
}

/**
* @return Compression factor.
*/
double GetCompressionFactor() const {
return compression_factor_;
}

/**
* @return True if this object is valid.
*/
bool IsValid() const {
return is_valid_;
}

/**
* @param[in] power_spectrum @f$(N/2+1)@f$-length power spectrum.
* @param[out] plp @f$M@f$-th order PLP features.
* @param[out] energy Signal energy @f$E@f$ (optional).
* @param[out] buffer Buffer.
* @return True on success, false on failure.
*/
bool Run(
const std::vector<double>& power_spectrum, std::vector<double>* plp,
double* energy,
PerceptualLinearPredictiveCoefficientsAnalysis::Buffer* buffer) const;

private:
const int liftering_coefficient_;
const double compression_factor_;

const MelFilterBankAnalysis mel_filter_bank_analysis_;
const InverseFourierTransform inverse_fourier_transform_;
const LevinsonDurbinRecursion levinson_durbin_recursion_;
const LinearPredictiveCoefficientsToCepstrum
linear_predictive_coefficients_to_cepstrum_;

bool is_valid_;

std::vector<double> equal_loudness_curve_;
std::vector<double> cepstal_weights_;

DISALLOW_COPY_AND_ASSIGN(PerceptualLinearPredictiveCoefficientsAnalysis);
};

} // namespace sptk

#endif // SPTK_ANALYSIS_PERCEPTUAL_LINEAR_PREDICTIVE_COEFFICIENTS_ANALYSIS_H_
3 changes: 3 additions & 0 deletions include/SPTK/math/discrete_fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class DiscreteFourierTransform {

bool is_valid_;

std::vector<double> sine_table_;
std::vector<double> cosine_table_;

DISALLOW_COPY_AND_ASSIGN(DiscreteFourierTransform);
};

Expand Down
2 changes: 1 addition & 1 deletion include/SPTK/math/fast_fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace sptk {

/**
* Calculate DFT of complex-valued input data.
* Calculate FFT of complex-valued input data.
*
* The inputs are @f$M@f$-th order complex-valued data:
* @f[
Expand Down
4 changes: 2 additions & 2 deletions include/SPTK/math/fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ class FourierTransform {
}

/**
* @param[in,out] real_part Real part.
* @param[in,out] imag_part Imaginary part.
* @param[in,out] real_part @f$L@f$-length real part.
* @param[in,out] imag_part @f$L@f$-length imaginary part.
* @return True on success, false on failure.
*/
bool Run(std::vector<double>* real_part,
Expand Down
106 changes: 106 additions & 0 deletions include/SPTK/math/inverse_discrete_fourier_transform.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// ------------------------------------------------------------------------ //
// Copyright 2021 SPTK Working Group //
// //
// Licensed under the Apache License, Version 2.0 (the "License"); //
// you may not use this file except in compliance with the License. //
// You may obtain a copy of the License at //
// //
// http://www.apache.org/licenses/LICENSE-2.0 //
// //
// Unless required by applicable law or agreed to in writing, software //
// distributed under the License is distributed on an "AS IS" BASIS, //
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
// See the License for the specific language governing permissions and //
// limitations under the License. //
// ------------------------------------------------------------------------ //

#ifndef SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_
#define SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_

#include <vector> // std::vector

#include "SPTK/math/discrete_fourier_transform.h"
#include "SPTK/utils/sptk_utils.h"

namespace sptk {

/**
* Calculate inverse DFT of complex-valued input data.
*
* The inputs are @f$L@f$-length complex-valued data:
* @f[
* \begin{array}{cccc}
* \mathrm{Re}(X(0)), & \mathrm{Re}(X(1)), & \ldots, & \mathrm{Re}(X(L-1)), \\
* \mathrm{Im}(X(0)), & \mathrm{Im}(X(1)), & \ldots, & \mathrm{Im}(X(L-1)).
* \end{array}
* @f]
* The outputs are
* @f[
* \begin{array}{cccc}
* \mathrm{Re}(x(0)), & \mathrm{Re}(x(1)), & \ldots, & \mathrm{Re}(x(L-1)), \\
* \mathrm{Im}(x(0)), & \mathrm{Im}(x(1)), & \ldots, & \mathrm{Im}(x(L-1)).
* \end{array}
* @f]
* They are computed as
* @f[
* x(n) = \frac{1}{L} \sum_{n=0}^{L-1} X(k) e^{j2\pi nk / L}.
* @f]
*/
class InverseDiscreteFourierTransform {
public:
/**
* @param[in] dft_length DFT length, @f$L@f$.
*/
explicit InverseDiscreteFourierTransform(int dft_length);

virtual ~InverseDiscreteFourierTransform() {
}

/**
* @return DFT length.
*/
int GetDftLength() const {
return dft_length_;
}

/**
* @return True if this object is valid.
*/
bool IsValid() const {
return is_valid_;
}

/**
* @param[in] real_part_input @f$L@f$-length real part of input.
* @param[in] imag_part_input @f$L@f$-length imaginary part of input.
* @param[out] real_part_output @f$L@f$-length real part of output.
* @param[out] imag_part_output @f$L@f$-length iaginary part of output.
* @return True on success, false on failure.
*/
bool Run(const std::vector<double>& real_part_input,
const std::vector<double>& imag_part_input,
std::vector<double>* real_part_output,
std::vector<double>* imag_part_output) const;

/**
* @param[in,out] real_part @f$L@f$-length real part.
* @param[in,out] imag_part @f$L@f$-length imaginary part.
* @return True on success, false on failure.
*/
bool Run(std::vector<double>* real_part,
std::vector<double>* imag_part) const;

private:
const int dft_length_;

bool is_valid_;

std::vector<double> sine_table_;
std::vector<double> cosine_table_;

DISALLOW_COPY_AND_ASSIGN(InverseDiscreteFourierTransform);
};

} // namespace sptk

#endif // SPTK_MATH_INVERSE_DISCRETE_FOURIER_TRANSFORM_H_
2 changes: 1 addition & 1 deletion include/SPTK/math/inverse_fast_fourier_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
namespace sptk {

/**
* Calculate inverse DFT of complex-valued input data.
* Calculate inverse FFT of complex-valued input data.
*
* The inputs are @f$M@f$-th order complex-valued data:
* @f[
Expand Down
Loading