Skip to content

Commit

Permalink
maintenance
Browse files Browse the repository at this point in the history
  • Loading branch information
takenori-y committed Nov 12, 2024
1 parent 3dbad2d commit d5420df
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ jobs:
torch: 2.0.0
torchaudio: 2.0.1
- python: 3.12
torch: 2.5.0
torchaudio: 2.5.0
torch: 2.5.1
torchaudio: 2.5.1

steps:
- name: Clone
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![Stable Manual](https://img.shields.io/badge/docs-stable-blue.svg)](https://sp-nitech.github.io/diffsptk/2.2.0/)
[![Downloads](https://static.pepy.tech/badge/diffsptk)](https://pepy.tech/project/diffsptk)
[![Python Version](https://img.shields.io/pypi/pyversions/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyTorch Version](https://img.shields.io/badge/pytorch-2.0.0%20%7C%202.5.0-orange.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyTorch Version](https://img.shields.io/badge/pytorch-2.0.0%20%7C%202.5.1-orange.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyPI Version](https://img.shields.io/pypi/v/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
[![Codecov](https://codecov.io/gh/sp-nitech/diffsptk/branch/master/graph/badge.svg)](https://app.codecov.io/gh/sp-nitech/diffsptk)
[![License](https://img.shields.io/github/license/sp-nitech/diffsptk.svg)](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE)
Expand Down
21 changes: 21 additions & 0 deletions diffsptk/misc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,27 @@ def remove_gain(a, value=1, return_gain=False):
return ret


def get_resample_params(mode="kaiser_best"):
# From https://pytorch.org/audio/stable/tutorials/audio_resampling_tutorial.html
if mode == "kaiser_best":
params = {
"lowpass_filter_width": 64,
"rolloff": 0.9475937167399596,
"resampling_method": "sinc_interp_kaiser",
"beta": 14.769656459379492,
}
elif mode == "kaiser_fast":
params = {
"lowpass_filter_width": 16,
"rolloff": 0.85,
"resampling_method": "sinc_interp_kaiser",
"beta": 8.555504641634386,
}
else:
raise ValueError("Only kaiser_best and kaiser_fast are supported.")
return params


def get_alpha(sr, mode="hts", n_freq=10, n_alpha=100):
"""Compute an appropriate frequency warping factor under given sample rate.
Expand Down
7 changes: 7 additions & 0 deletions diffsptk/modules/cqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

from ..misc.utils import Lambda
from ..misc.utils import delayed_import
from ..misc.utils import get_resample_params
from ..misc.utils import numpy_to_torch
from .stft import ShortTimeFourierTransform as STFT

Expand Down Expand Up @@ -79,6 +80,9 @@ class ConstantQTransform(nn.Module):
scale : bool
If True, scale the CQT responce by the length of filter.
res_type : ['kaiser_best', 'kaiser_fast'] or None
Resampling type.
**kwargs : additional keyword arguments
See `torchaudio.transforms.Resample
<https://pytorch.org/audio/main/generated/torchaudio.transforms.Resample.html>`_.
Expand All @@ -99,6 +103,7 @@ def __init__(
sparsity=1e-2,
window="hann",
scale=True,
res_type="kaiser_best",
**kwargs,
):
super().__init__()
Expand Down Expand Up @@ -144,6 +149,8 @@ def __init__(
downsample_count = early_downsample_count(
sample_rate * 0.5, filter_cutoff, frame_period, n_octave
)
if res_type is not None:
kwargs.update(get_resample_params(res_type))
if 0 < downsample_count:
downsample_factor = 2**downsample_count
early_downsample.append(
Expand Down
15 changes: 11 additions & 4 deletions diffsptk/modules/excite.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,20 @@ def _forward(p, frame_period, voiced_region, unvoiced_region, polarity):
unipolar = polarity == "unipolar"
e = torch.zeros_like(p)
if voiced_region == "pulse":
r = torch.ceil(phase)
r = F.pad(r, (1, 0))
pulse_pos = torch.ge(torch.diff(r), 1)

def get_pulse_pos(p):
r = torch.ceil(p)
r = F.pad(r, (1, 0))
return torch.ge(torch.diff(r), 1)

if unipolar:
pulse_pos = get_pulse_pos(phase)
e[pulse_pos] = torch.sqrt(p[pulse_pos])
else:
raise RuntimeError
pulse_pos1 = get_pulse_pos(phase)
pulse_pos2 = get_pulse_pos(0.5 * phase)
e[pulse_pos1] = torch.sqrt(p[pulse_pos1])
e[pulse_pos1 & ~pulse_pos2] *= -1
elif voiced_region == "sinusoidal":
if unipolar:
e[mask] = 0.5 * (1 - torch.cos(TWO_PI * phase[mask]))
Expand Down
8 changes: 8 additions & 0 deletions diffsptk/modules/icqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import torchaudio

from ..misc.utils import delayed_import
from ..misc.utils import get_resample_params
from ..misc.utils import numpy_to_torch
from .istft import InverseShortTimeFourierTransform as ISTFT

Expand Down Expand Up @@ -78,6 +79,9 @@ class InverseConstantQTransform(nn.Module):
scale : bool
If True, scale the CQT responce by the length of filter.
res_type : ['kaiser_best', 'kaiser_fast'] or None
Resampling type.
**kwargs : additional keyword arguments
See `torchaudio.transforms.Resample
<https://pytorch.org/audio/main/generated/torchaudio.transforms.Resample.html>`_.
Expand All @@ -98,6 +102,7 @@ def __init__(
sparsity=1e-2,
window="hann",
scale=True,
res_type="kaiser_best",
**kwargs,
):
super().__init__()
Expand Down Expand Up @@ -156,6 +161,9 @@ def __init__(
transforms = []
resamplers = []

if res_type is not None:
kwargs.update(get_resample_params(res_type))

for i in range(n_octave):
n_filter = min(B, K - B * i)
sl = slice(B * i, B * i + n_filter)
Expand Down
7 changes: 4 additions & 3 deletions tests/test_cqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
@pytest.mark.parametrize("fp", [511, 512])
@pytest.mark.parametrize("K", [1, 24])
@pytest.mark.parametrize("scale", [False, True])
def test_compatibility(device, fp, K, scale, B=12, f_min=32.7):
@pytest.mark.parametrize("res_type", ["kaiser_best", "kaiser_fast"])
def test_compatibility(device, fp, K, scale, res_type, B=12, f_min=32.7):
if device == "cuda" and not torch.cuda.is_available():
return

Expand All @@ -45,12 +46,12 @@ def test_compatibility(device, fp, K, scale, B=12, f_min=32.7):
bins_per_octave=B,
hop_length=fp,
scale=scale,
res_type="kaiser_best",
res_type=res_type,
dtype=None,
).T

cqt = diffsptk.CQT(
fp, sr, f_min=f_min, n_bin=K, n_bin_per_octave=B, scale=scale
fp, sr, f_min=f_min, n_bin=K, n_bin_per_octave=B, scale=scale, res_type=res_type
).to(device)
c2 = cqt(x).cpu().numpy()

Expand Down
7 changes: 3 additions & 4 deletions tests/test_excite.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,14 @@ def compute_error(infile):
)
@pytest.mark.parametrize("polarity", ["unipolar", "bipolar"])
def test_waveform(voiced_region, polarity, P=80, verbose=False):
if voiced_region == "pulse" and polarity == "bipolar":
return

excite = diffsptk.ExcitationGeneration(
P, voiced_region=voiced_region, unvoiced_region="zeros", polarity=polarity
)
pitch = torch.from_numpy(
U.call("x2x +sd tools/SPTK/asset/data.short | " f"pitch -s 16 -p {P} -o 0 -a 2")
U.call(f"x2x +sd tools/SPTK/asset/data.short | pitch -s 16 -p {P} -o 0 -a 2")
)
e = excite(pitch)
if voiced_region == "pulse":
e = e / e.abs().max()
if verbose:
sf.write(f"excite_{voiced_region}_{polarity}.wav", e, 16000)

0 comments on commit d5420df

Please sign in to comment.