Move faded time mask transformation from separate function to parameter in regular TimeMask

askskro · askskro · commit c83640ff2c06 · 2019-10-31T22:39:24.000+01:00
diff --git a/audiomentations/augmentations/transforms.py b/audiomentations/augmentations/transforms.py
@@ -84,17 +84,19 @@ def apply(self, samples, sample_rate):
 class TimeMask(BasicTransform):
     """Mask some time band on the spectrogram. Inspired by https://arxiv.org/pdf/1904.08779.pdf """
 
-    def __init__(self, min_band_part=0.0, max_band_part=0.5, p=0.5):
+    def __init__(self, min_band_part=0.0, max_band_part=0.5, fade=False, p=0.5):
         """
         :param min_band_part: Minimum length of the silent part as a fraction of the
             total sound length. Float.
         :param max_band_part: Maximum length of the silent part as a fraction of the
             total sound length. Float.
+        :param fade: Bool, Add linear fade in and fade out of the silent part.
         :param p:
         """
         super().__init__(p)
         self.min_band_part = min_band_part
         self.max_band_part = max_band_part
+        self.fade = fade
 
     def apply(self, samples, sample_rate):
         new_samples = samples.copy()
@@ -103,41 +105,12 @@ def apply(self, samples, sample_rate):
             int(new_samples.shape[0] * self.max_band_part),
         )
         _t0 = random.randint(0, new_samples.shape[0] - _t)
-        new_samples[_t0 : _t0 + _t] = 0
-        return new_samples
-
-
-class SmoothFadeTimeMask(BasicTransform):
-    """Mask some time band on the spectrogram with fade in and fade out.
-
-    Same transformation as TimeMask but with linear smoothing"""
-
-    def __init__(self, min_band_part=0.0, max_band_part=0.5, p=0.5):
-        """
-        :param min_band_part: Minimum length of the silent part as a fraction of the
-            total sound length. Float.
-        :param max_band_part: Maximum length of the silent part as a fraction of the
-            total sound length. Float.
-        :param p:
-        """
-        super().__init__(p)
-        self.min_band_part = min_band_part
-        self.max_band_part = max_band_part
-
-    def apply(self, samples, sample_rate):
-        new_samples = samples.copy()
-        _t = random.randint(
-            int(new_samples.shape[0] * self.min_band_part),
-            int(new_samples.shape[0] * self.max_band_part),
-        )
-        _t0 = random.randint(0, new_samples.shape[0] - _t)
-        # fade length is 10 ms or 10% of silent part if silent part is less than 10 ms
-        fade_length = min(int(sample_rate * 0.01), int(_t * 0.1))
-        linear_fade_in = np.linspace(0, 1, num=fade_length)
-        linear_fade_out = np.linspace(1, 0, num=fade_length)
-        new_samples[_t0 : _t0 + fade_length] *= linear_fade_out
-        new_samples[_t0 + _t - fade_length : _t0 + _t] *= linear_fade_in
-        new_samples[_t0 + fade_length : _t0 + _t - fade_length] = 0
+        mask = np.zeros(_t)
+        if self.fade:
+            fade_length = min(int(sample_rate * 0.01), int(_t * 0.1))
+            mask[0:fade_length] = np.linspace(1, 0, num=fade_length)
+            mask[-fade_length:] = np.linspace(0, 1, num=fade_length)
+        new_samples[_t0 : _t0 + _t] *= mask
         return new_samples
 
 
diff --git a/demo/demo.py b/demo/demo.py
@@ -13,7 +13,6 @@
     AddImpulseResponse,
     FrequencyMask,
     TimeMask,
-    SmoothFadeTimeMask,
     AddGaussianSNR,
     Resample,
     ClippingDistortion,
@@ -75,15 +74,6 @@ def load_wav_file(sound_file_path):
         augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
         wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
 
-    # SmoothFadeTimeMask
-    augmenter = Compose([SmoothFadeTimeMask(p=1.0)])
-    for i in range(5):
-        output_file_path = os.path.join(
-            output_dir, "SmoothFadeTimeMask_{:03d}.wav".format(i)
-        )
-        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
-        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
-
     # AddGaussianSNR
     augmenter = Compose([AddGaussianSNR(p=1.0)])
     for i in range(5):
diff --git a/tests/test_smooth_fade_time_mask.py b/tests/test_smooth_fade_time_mask.py
diff --git a/tests/test_time_mask.py b/tests/test_time_mask.py
@@ -20,3 +20,35 @@ def test_dynamic_length(self):
         std_in = np.mean(np.abs(samples_in))
         std_out = np.mean(np.abs(samples_out))
         self.assertLess(std_out, std_in)
+
+    def test_dynamic_length_with_fade(self):
+        sample_len = 1024
+        samples_in = np.random.normal(0, 1, size=sample_len).astype(np.float32)
+        sample_rate = 16000
+        augmenter = Compose(
+            [TimeMask(min_band_part=0.2, max_band_part=0.5, fade=True, p=1.0)]
+        )
+
+        samples_out = augmenter(samples=samples_in, sample_rate=sample_rate)
+        self.assertEqual(samples_out.dtype, np.float32)
+        self.assertEqual(len(samples_out), sample_len)
+
+        std_in = np.mean(np.abs(samples_in))
+        std_out = np.mean(np.abs(samples_out))
+        self.assertLess(std_out, std_in)
+
+    def test_dynamic_length_with_fade_short_signal(self):
+        sample_len = 100
+        samples_in = np.random.normal(0, 1, size=sample_len).astype(np.float32)
+        sample_rate = 16000
+        augmenter = Compose(
+            [TimeMask(min_band_part=0.2, max_band_part=0.5, fade=True, p=1.0)]
+        )
+
+        samples_out = augmenter(samples=samples_in, sample_rate=sample_rate)
+        self.assertEqual(samples_out.dtype, np.float32)
+        self.assertEqual(len(samples_out), sample_len)
+
+        std_in = np.mean(np.abs(samples_in))
+        std_out = np.mean(np.abs(samples_out))
+        self.assertLess(std_out, std_in)