Merge pull request #338 from iver56/ij/avoid-limiter-delay

Avoid delay (zeros in the start) in Limiter output
iver56 · Apr 29, 2024 · 956e3ca · 956e3ca
2 parents 21dbc43 + bbb8e53
commit 956e3ca
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 13 deletions.
diff --git a/audiomentations/augmentations/limiter.py b/audiomentations/augmentations/limiter.py
@@ -1,12 +1,15 @@
+import math
 import random
+import sys
 
-import math
 import numpy as np
-import sys
 from numpy.typing import NDArray
 
 from audiomentations.core.transforms_interface import BaseWaveformTransform
-from audiomentations.core.utils import convert_decibels_to_amplitude_ratio, get_max_abs_amplitude
+from audiomentations.core.utils import (
+    convert_decibels_to_amplitude_ratio,
+    get_max_abs_amplitude,
+)
 
 
 class Limiter(BaseWaveformTransform):
@@ -129,18 +132,24 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
             delay=self.parameters["delay"],
             threshold=self.parameters["threshold"],
         )
+
         if samples.ndim == 1:
-            processed_samples = np.copy(samples)
+            processed_samples = np.pad(samples, (0, self.parameters["delay"]))
             limiter.limit_inplace(processed_samples)
+            processed_samples = processed_samples[self.parameters["delay"] - 1 : -1]
         else:
             # By default, there is no interchannel linking. The channels are processed
             # independently. Support for linking may be added in the future:
             # https://github.com/pzelasko/cylimiter/issues/4
             processed_samples = np.copy(samples)
             for chn_idx in range(samples.shape[0]):
                 limiter.reset()
-                channel = np.ascontiguousarray(processed_samples[chn_idx, :])
+                channel = np.ascontiguousarray(
+                    np.pad(processed_samples[chn_idx, :], (0, self.parameters["delay"]))
+                )
                 limiter.limit_inplace(channel)
-                processed_samples[chn_idx, :] = channel
+                processed_samples[chn_idx, :] = channel[
+                    self.parameters["delay"] - 1 : -1
+                ]
 
         return processed_samples
diff --git a/docs/waveform_transforms/limiter.md b/docs/waveform_transforms/limiter.md
@@ -4,12 +4,13 @@ _Added in v0.26.0_
 
 The `Limiter`, based on [cylimiter :octicons-link-external-16:](https://github.com/pzelasko/cylimiter){target=_blank}, is a straightforward audio transform that applies dynamic range compression.
 It is capable of limiting the audio signal based on certain parameters.
-Additionally, please note that this transform introduces a slight delay in the signal, equivalent to a fraction of the attack time.
 
 * The _threshold_ determines the audio level above which the limiter kicks in.
 * The _attack_ time is how quickly the limiter kicks in once the audio signal starts exceeding the threshold.
 * The _release_ time determines how quickly the limiter stops working after the signal drops below the threshold.
 
+:warning: In audiomentations v0.35.0 and earlier, this transform introduced a delay in the signal, equivalent to a ~60% of the attack time. Starting from v0.36.0, the output is aligned with the input, i.e. no delay.
+
 ## Input-output example
 
 In this example we apply the limiter with a threshold that is 10 dB lower than the signal peak

diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,7 @@ audioread==2.1.9
 black
 coverage==7.3.4
 cylimiter==0.3.0
+fast-align-audio==0.3.0
 lameenc==1.4.2
 librosa==0.10.0.post2
 matplotlib>=3.0.0,<4

diff --git a/tests/test_limiter.py b/tests/test_limiter.py
@@ -1,23 +1,43 @@
 import json
 import random
 
+import fast_align_audio
 import numpy as np
 import pytest
 
 from audiomentations import Limiter
 
 
 class TestLimiter:
-    @pytest.mark.parametrize("samples_in", [
-        np.random.normal(0, 1, size=1024).astype(np.float32),
-        np.random.normal(0, 0.001, size=(1, 50)).astype(np.float32),
-        np.random.normal(0, 0.1, size=(3, 8888)).astype(np.float32)
-    ])
+    @pytest.mark.parametrize(
+        "samples_in",
+        [
+            np.random.normal(0, 1, size=1000).astype(np.float32),
+            np.random.normal(0, 0.001, size=(1, 250)).astype(np.float32),
+            np.random.normal(0, 0.1, size=(3, 8888)).astype(np.float32),
+        ],
+    )
     def test_limiter(self, samples_in):
-        augmenter = Limiter(p=1.0)
+        augmenter = Limiter(p=1.0, min_attack=0.0025, max_attack=0.0025)
         std_in = np.mean(np.abs(samples_in))
         samples_out = augmenter(samples=samples_in, sample_rate=16000)
         std_out = np.mean(np.abs(samples_out))
+        length = samples_in.shape[-1]
+
+        samples_in_mono = samples_in
+        samples_out_mono = samples_out
+        if samples_in_mono.ndim > 1:
+            samples_in_mono = samples_in_mono[0]
+            samples_out_mono = samples_out_mono[0]
+        offset, _ = fast_align_audio.find_best_alignment_offset(
+            reference_signal=samples_in_mono,
+            delayed_signal=samples_out_mono,
+            max_offset_samples=length // 2,
+            lookahead_samples=length // 2,
+        )
+        # Check that the output is aligned with the input, i.e. no delay was introduced
+        assert offset == 0
+
         assert samples_out.dtype == np.float32
         assert samples_out.shape == samples_in.shape
         assert std_out < std_in