Skip to content

Commit

Permalink
Merge pull request #338 from iver56/ij/avoid-limiter-delay
Browse files Browse the repository at this point in the history
Avoid delay (zeros in the start) in Limiter output
  • Loading branch information
iver56 authored Apr 29, 2024
2 parents 21dbc43 + bbb8e53 commit 956e3ca
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 13 deletions.
21 changes: 15 additions & 6 deletions audiomentations/augmentations/limiter.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import math
import random
import sys

import math
import numpy as np
import sys
from numpy.typing import NDArray

from audiomentations.core.transforms_interface import BaseWaveformTransform
from audiomentations.core.utils import convert_decibels_to_amplitude_ratio, get_max_abs_amplitude
from audiomentations.core.utils import (
convert_decibels_to_amplitude_ratio,
get_max_abs_amplitude,
)


class Limiter(BaseWaveformTransform):
Expand Down Expand Up @@ -129,18 +132,24 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
delay=self.parameters["delay"],
threshold=self.parameters["threshold"],
)

if samples.ndim == 1:
processed_samples = np.copy(samples)
processed_samples = np.pad(samples, (0, self.parameters["delay"]))
limiter.limit_inplace(processed_samples)
processed_samples = processed_samples[self.parameters["delay"] - 1 : -1]
else:
# By default, there is no interchannel linking. The channels are processed
# independently. Support for linking may be added in the future:
# https://github.com/pzelasko/cylimiter/issues/4
processed_samples = np.copy(samples)
for chn_idx in range(samples.shape[0]):
limiter.reset()
channel = np.ascontiguousarray(processed_samples[chn_idx, :])
channel = np.ascontiguousarray(
np.pad(processed_samples[chn_idx, :], (0, self.parameters["delay"]))
)
limiter.limit_inplace(channel)
processed_samples[chn_idx, :] = channel
processed_samples[chn_idx, :] = channel[
self.parameters["delay"] - 1 : -1
]

return processed_samples
3 changes: 2 additions & 1 deletion docs/waveform_transforms/limiter.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ _Added in v0.26.0_

The `Limiter`, based on [cylimiter :octicons-link-external-16:](https://github.com/pzelasko/cylimiter){target=_blank}, is a straightforward audio transform that applies dynamic range compression.
It is capable of limiting the audio signal based on certain parameters.
Additionally, please note that this transform introduces a slight delay in the signal, equivalent to a fraction of the attack time.

* The _threshold_ determines the audio level above which the limiter kicks in.
* The _attack_ time is how quickly the limiter kicks in once the audio signal starts exceeding the threshold.
* The _release_ time determines how quickly the limiter stops working after the signal drops below the threshold.

:warning: In audiomentations v0.35.0 and earlier, this transform introduced a delay in the signal, equivalent to a ~60% of the attack time. Starting from v0.36.0, the output is aligned with the input, i.e. no delay.

## Input-output example

In this example we apply the limiter with a threshold that is 10 dB lower than the signal peak
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ audioread==2.1.9
black
coverage==7.3.4
cylimiter==0.3.0
fast-align-audio==0.3.0
lameenc==1.4.2
librosa==0.10.0.post2
matplotlib>=3.0.0,<4
Expand Down
32 changes: 26 additions & 6 deletions tests/test_limiter.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,43 @@
import json
import random

import fast_align_audio
import numpy as np
import pytest

from audiomentations import Limiter


class TestLimiter:
@pytest.mark.parametrize("samples_in", [
np.random.normal(0, 1, size=1024).astype(np.float32),
np.random.normal(0, 0.001, size=(1, 50)).astype(np.float32),
np.random.normal(0, 0.1, size=(3, 8888)).astype(np.float32)
])
@pytest.mark.parametrize(
"samples_in",
[
np.random.normal(0, 1, size=1000).astype(np.float32),
np.random.normal(0, 0.001, size=(1, 250)).astype(np.float32),
np.random.normal(0, 0.1, size=(3, 8888)).astype(np.float32),
],
)
def test_limiter(self, samples_in):
augmenter = Limiter(p=1.0)
augmenter = Limiter(p=1.0, min_attack=0.0025, max_attack=0.0025)
std_in = np.mean(np.abs(samples_in))
samples_out = augmenter(samples=samples_in, sample_rate=16000)
std_out = np.mean(np.abs(samples_out))
length = samples_in.shape[-1]

samples_in_mono = samples_in
samples_out_mono = samples_out
if samples_in_mono.ndim > 1:
samples_in_mono = samples_in_mono[0]
samples_out_mono = samples_out_mono[0]
offset, _ = fast_align_audio.find_best_alignment_offset(
reference_signal=samples_in_mono,
delayed_signal=samples_out_mono,
max_offset_samples=length // 2,
lookahead_samples=length // 2,
)
# Check that the output is aligned with the input, i.e. no delay was introduced
assert offset == 0

assert samples_out.dtype == np.float32
assert samples_out.shape == samples_in.shape
assert std_out < std_in
Expand Down

0 comments on commit 956e3ca

Please sign in to comment.