noise related augmentations removed

MinaKh · MinaKh · commit 628bb5579c2b · 2023-12-05T14:08:24.000+01:00
diff --git a/tonic/audio_augmentations.py b/tonic/audio_augmentations.py
@@ -1,14 +1,10 @@
-import os
 import random
 from dataclasses import dataclass, field
-from typing import Optional
 
 import librosa
 import numpy as np
 import torch
 import torchaudio
-
-# from qut_noise import QUTNoise
 from torchaudio.utils import download_asset
 
 from tonic.audio_transforms import FixLength
@@ -18,8 +14,6 @@
     "RandomPitchShift",
     "RandomAmplitudeScale",
     "AddWhiteNoise",
-    "AddHomeNoise",
-    "EmbeddedHomeNoise",
     "RIR",
 ]
 
@@ -174,197 +168,6 @@ def __call__(self, audio: np.ndarray):
         return noisy_audio
 
 
-# @dataclass
-# class AddHomeNoise:
-#     """Add a home background noise (from QUTNOise dataset) to the audio sample with a known snr
-#     (signal to noise ratio).
-
-#     Parameters:
-#         sample_length (int): sample length in seconds
-#         target_sr (float): the target sample rate of the mixed final signal (default to the higher sample rate, between sample rates of noise and data )
-#         params_dataset (dict): containing other parameters of the noise dataset
-#         orig_sr (float): original sample rate of data
-#         factors (float): range of desired snrs
-#         partition (str): partition of the QUTNoise dataset that is used for noise augmentation
-#         aug_index (int): index of the chosen factor for snr. It will be randomly chosen from the desired range (if not passed while initilization)
-#         caching (bool): if we are caching the DiskCached dataset will dynamically pass copy index of data item to the transform (to set aug_index). Otherwise the aug_index will be chosen randomly in every call of transform
-#         seed (int): a fixed seed for reproducibility
-#     Args:
-#         audio (np.ndarray): data sample
-#     Returns:
-#         np.ndarray: data sample with added noise
-#     """
-
-#     sample_length: int
-#     params_dataset: dict
-#     target_sr: float = 48000
-#     orig_sr: float = 16000
-#     factors: list = field(default_factory=lambda: [0, 10, 20])
-#     partition: str = "test"
-#     aug_index: int = 0
-#     caching: bool = False
-#     seed: int = 123
-
-#     def __post_init__(self):
-#         random.seed(self.seed)
-
-#         noises = QUTNoise(
-#             classes=["HOME"],
-#             create_splits=False,
-#             duration_split=[self.sample_length],
-#             partition=self.partition,
-#             **self.params_dataset,
-#         )
-
-#         split_qutnoise_path = noises.config_path
-
-#         self.wave_files_path = (
-#             str(split_qutnoise_path)
-#             + "/splits_"
-#             + str(self.sample_length)
-#             + "s"
-#             + "/"
-#             + self.partition
-#             + "/"
-#         )
-
-#         self.home_noises = os.listdir(self.wave_files_path)
-
-#     def resample(self, audio):
-#         audio_resampled = librosa.resample(
-#             audio, orig_sr=self.orig_sr, target_sr=self.target_sr
-#         )
-#         return audio_resampled
-
-#     def get_noise(self):
-#         self.noise_wave = random.choice(self.home_noises)
-
-#         noise, _ = librosa.core.load(
-#             self.wave_files_path + self.noise_wave, sr=self.target_sr
-#         )
-#         self.noise = noise[0 : int(self.target_sr) * self.sample_length]
-#         return self.noise
-
-#     def add_noise(
-#         self,
-#         waveform: torch.Tensor,
-#         noise: torch.Tensor,
-#         snr: torch.Tensor,
-#     ) -> torch.Tensor:
-#         """Scales and adds noise to waveform per signal-to-noise ratio.
-
-#         Specifically, for each pair of waveform vector :math:`x \in \mathbb{R}^L` and noise vector
-#         :math:`n \in \mathbb{R}^L`, the function computes output :math:`y` as
-#         .. math::
-#             y = x + a n \, \text{,}
-#         where
-#         .. math::
-#             a = \sqrt{ \frac{ ||x||_{2}^{2} }{ ||n||_{2}^{2} } \cdot 10^{-\frac{\text{SNR}}{10}} } \, \text{,}
-#         with :math:`\text{SNR}` being the desired signal-to-noise ratio between :math:`x` and :math:`n`, in dB.
-#         Note that this function broadcasts singleton leading dimensions in its inputs in a manner that is
-#         consistent with the above formulae and PyTorch's broadcasting semantics.
-#         .. devices:: CPU CUDA
-#         .. properties:: Autograd TorchScript
-#         Args:
-#             waveform (torch.Tensor): Input waveform, with shape `(..., L)`.
-#             noise (torch.Tensor): Noise, with shape `(..., L)` (same shape as ``waveform``).
-#             snr (torch.Tensor): Signal-to-noise ratios in dB, with shape `(...,)`.
-#         Returns:
-#             torch.Tensor: Result of scaling and adding ``noise`` to ``waveform``, with shape `(..., L)`
-#             (same shape as ``waveform``).
-#         """
-
-#         L = waveform.size(-1)
-
-#         if L != noise.size(-1):
-#             raise ValueError(
-#                 f"Length dimensions of waveform and noise don't match (got {L} and {noise.size(-1)})."
-#             )
-
-#         # compute scale, second by second
-#         noisy_audio = torch.zeros_like(waveform)
-#         for i in range(0, self.sample_length):
-#             start, end = int(i * self.target_sr), int((i + 1) * self.target_sr)
-#             sig, noise_ = waveform[:, start:end], noise[:, start:end]
-
-#             energy_signal = torch.linalg.vector_norm(sig, ord=2, dim=-1) ** 2  # (*,)
-#             energy_noise = torch.linalg.vector_norm(noise_, ord=2, dim=-1) ** 2  # (*,)
-#             original_snr_db = 10 * (
-#                 torch.log10(energy_signal) - torch.log10(energy_noise)
-#             )
-#             scale = 10 ** ((original_snr_db - snr) / 20.0)  # (*,)
-
-#             # scale noise
-#             self.scaled_noise = scale.unsqueeze(-1) * noise_  # (*, 1) * (*, L) = (*, L)
-#             noisy_audio[:, start:end] = sig + self.scaled_noise
-
-#         return noisy_audio
-
-#     def __call__(self, audio: np.ndarray):
-#         if not self.caching:
-#             self.aug_index = random.choice(range(0, len(self.factors)))
-#         snr_db = torch.tensor([self.factors[self.aug_index]])
-#         self.noise = torch.from_numpy(self.get_noise())
-#         self.noise = torch.unsqueeze(self.noise, dim=0)
-#         self.resampled_audio = torch.from_numpy(self.resample(audio))
-#         noisy_audio = self.add_noise(self.resampled_audio, self.noise, snr_db)
-
-#         return noisy_audio.detach().numpy()
-
-
-# @dataclass
-# class EmbeddedHomeNoise(AddHomeNoise):
-#     """Add a home background noise (from QUTNOise dataset) to the data sample with a known snr_db
-#     (signal to noise ratio).
-
-#     The difference with AddHomeNoise is that a leading (/and trainling) noise will be added to the augmented sample.
-#     Parameters:
-#         noise_length (int): the length of noise (in seconds) that will be added to the sample
-#         two_sided (bool): if True the augmented signal will be encompassed between leading and trailing noises
-#     Args:
-#         audio (np.ndarray): data sample
-#     Returns:
-#         np.ndarray: data sample with added noise at the begining
-#     """
-
-#     noise_length: int = None
-#     two_sided: bool = False
-
-#     def __post_init__(self):
-#         super().__post_init__()
-
-#         if self.noise_length is None:
-#             raise ValueError("noise length is not specified")
-#         elif self.noise_length > self.sample_length:
-#             raise ValueError(
-#                 "in the current implementation length of noise can't exceed sample length"
-#             )
-
-#     def __call__(self, audio: np.ndarray):
-#         if not self.caching:
-#             self.aug_index = random.choice(range(0, len(self.factors)))
-#         snr_db = torch.tensor([self.factors[self.aug_index]])
-
-#         self.noise = torch.from_numpy(self.get_noise())
-#         self.noise = torch.unsqueeze(self.noise, dim=0)
-#         self.resampled_audio = torch.from_numpy(self.resample(audio))
-#         noisy_audio = (
-#             self.add_noise(self.resampled_audio, self.noise, snr_db).detach().numpy()
-#         )
-
-#         initial_noise = self.scaled_noise[
-#             :, 0 : int(self.target_sr * self.noise_length)
-#         ]
-#         if self.two_sided:
-#             noise_then_audio = np.concatenate(
-#                 (initial_noise, noisy_audio, initial_noise), axis=1
-#             )
-#         else:
-#             noise_then_audio = np.concatenate((initial_noise, noisy_audio), axis=1)
-
-#         return noise_then_audio
-
-
 @dataclass
 class RIR:
     """Convolves a RIR (room impluse response) to the data sample.