Skip to content

Commit 628bb55

Browse files
committed
noise related augmentations removed
1 parent 04aac21 commit 628bb55

File tree

1 file changed

+0
-197
lines changed

1 file changed

+0
-197
lines changed

tonic/audio_augmentations.py

Lines changed: 0 additions & 197 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
1-
import os
21
import random
32
from dataclasses import dataclass, field
4-
from typing import Optional
53

64
import librosa
75
import numpy as np
86
import torch
97
import torchaudio
10-
11-
# from qut_noise import QUTNoise
128
from torchaudio.utils import download_asset
139

1410
from tonic.audio_transforms import FixLength
@@ -18,8 +14,6 @@
1814
"RandomPitchShift",
1915
"RandomAmplitudeScale",
2016
"AddWhiteNoise",
21-
"AddHomeNoise",
22-
"EmbeddedHomeNoise",
2317
"RIR",
2418
]
2519

@@ -174,197 +168,6 @@ def __call__(self, audio: np.ndarray):
174168
return noisy_audio
175169

176170

177-
# @dataclass
178-
# class AddHomeNoise:
179-
# """Add a home background noise (from QUTNOise dataset) to the audio sample with a known snr
180-
# (signal to noise ratio).
181-
182-
# Parameters:
183-
# sample_length (int): sample length in seconds
184-
# target_sr (float): the target sample rate of the mixed final signal (default to the higher sample rate, between sample rates of noise and data )
185-
# params_dataset (dict): containing other parameters of the noise dataset
186-
# orig_sr (float): original sample rate of data
187-
# factors (float): range of desired snrs
188-
# partition (str): partition of the QUTNoise dataset that is used for noise augmentation
189-
# aug_index (int): index of the chosen factor for snr. It will be randomly chosen from the desired range (if not passed while initilization)
190-
# caching (bool): if we are caching the DiskCached dataset will dynamically pass copy index of data item to the transform (to set aug_index). Otherwise the aug_index will be chosen randomly in every call of transform
191-
# seed (int): a fixed seed for reproducibility
192-
# Args:
193-
# audio (np.ndarray): data sample
194-
# Returns:
195-
# np.ndarray: data sample with added noise
196-
# """
197-
198-
# sample_length: int
199-
# params_dataset: dict
200-
# target_sr: float = 48000
201-
# orig_sr: float = 16000
202-
# factors: list = field(default_factory=lambda: [0, 10, 20])
203-
# partition: str = "test"
204-
# aug_index: int = 0
205-
# caching: bool = False
206-
# seed: int = 123
207-
208-
# def __post_init__(self):
209-
# random.seed(self.seed)
210-
211-
# noises = QUTNoise(
212-
# classes=["HOME"],
213-
# create_splits=False,
214-
# duration_split=[self.sample_length],
215-
# partition=self.partition,
216-
# **self.params_dataset,
217-
# )
218-
219-
# split_qutnoise_path = noises.config_path
220-
221-
# self.wave_files_path = (
222-
# str(split_qutnoise_path)
223-
# + "/splits_"
224-
# + str(self.sample_length)
225-
# + "s"
226-
# + "/"
227-
# + self.partition
228-
# + "/"
229-
# )
230-
231-
# self.home_noises = os.listdir(self.wave_files_path)
232-
233-
# def resample(self, audio):
234-
# audio_resampled = librosa.resample(
235-
# audio, orig_sr=self.orig_sr, target_sr=self.target_sr
236-
# )
237-
# return audio_resampled
238-
239-
# def get_noise(self):
240-
# self.noise_wave = random.choice(self.home_noises)
241-
242-
# noise, _ = librosa.core.load(
243-
# self.wave_files_path + self.noise_wave, sr=self.target_sr
244-
# )
245-
# self.noise = noise[0 : int(self.target_sr) * self.sample_length]
246-
# return self.noise
247-
248-
# def add_noise(
249-
# self,
250-
# waveform: torch.Tensor,
251-
# noise: torch.Tensor,
252-
# snr: torch.Tensor,
253-
# ) -> torch.Tensor:
254-
# """Scales and adds noise to waveform per signal-to-noise ratio.
255-
256-
# Specifically, for each pair of waveform vector :math:`x \in \mathbb{R}^L` and noise vector
257-
# :math:`n \in \mathbb{R}^L`, the function computes output :math:`y` as
258-
# .. math::
259-
# y = x + a n \, \text{,}
260-
# where
261-
# .. math::
262-
# a = \sqrt{ \frac{ ||x||_{2}^{2} }{ ||n||_{2}^{2} } \cdot 10^{-\frac{\text{SNR}}{10}} } \, \text{,}
263-
# with :math:`\text{SNR}` being the desired signal-to-noise ratio between :math:`x` and :math:`n`, in dB.
264-
# Note that this function broadcasts singleton leading dimensions in its inputs in a manner that is
265-
# consistent with the above formulae and PyTorch's broadcasting semantics.
266-
# .. devices:: CPU CUDA
267-
# .. properties:: Autograd TorchScript
268-
# Args:
269-
# waveform (torch.Tensor): Input waveform, with shape `(..., L)`.
270-
# noise (torch.Tensor): Noise, with shape `(..., L)` (same shape as ``waveform``).
271-
# snr (torch.Tensor): Signal-to-noise ratios in dB, with shape `(...,)`.
272-
# Returns:
273-
# torch.Tensor: Result of scaling and adding ``noise`` to ``waveform``, with shape `(..., L)`
274-
# (same shape as ``waveform``).
275-
# """
276-
277-
# L = waveform.size(-1)
278-
279-
# if L != noise.size(-1):
280-
# raise ValueError(
281-
# f"Length dimensions of waveform and noise don't match (got {L} and {noise.size(-1)})."
282-
# )
283-
284-
# # compute scale, second by second
285-
# noisy_audio = torch.zeros_like(waveform)
286-
# for i in range(0, self.sample_length):
287-
# start, end = int(i * self.target_sr), int((i + 1) * self.target_sr)
288-
# sig, noise_ = waveform[:, start:end], noise[:, start:end]
289-
290-
# energy_signal = torch.linalg.vector_norm(sig, ord=2, dim=-1) ** 2 # (*,)
291-
# energy_noise = torch.linalg.vector_norm(noise_, ord=2, dim=-1) ** 2 # (*,)
292-
# original_snr_db = 10 * (
293-
# torch.log10(energy_signal) - torch.log10(energy_noise)
294-
# )
295-
# scale = 10 ** ((original_snr_db - snr) / 20.0) # (*,)
296-
297-
# # scale noise
298-
# self.scaled_noise = scale.unsqueeze(-1) * noise_ # (*, 1) * (*, L) = (*, L)
299-
# noisy_audio[:, start:end] = sig + self.scaled_noise
300-
301-
# return noisy_audio
302-
303-
# def __call__(self, audio: np.ndarray):
304-
# if not self.caching:
305-
# self.aug_index = random.choice(range(0, len(self.factors)))
306-
# snr_db = torch.tensor([self.factors[self.aug_index]])
307-
# self.noise = torch.from_numpy(self.get_noise())
308-
# self.noise = torch.unsqueeze(self.noise, dim=0)
309-
# self.resampled_audio = torch.from_numpy(self.resample(audio))
310-
# noisy_audio = self.add_noise(self.resampled_audio, self.noise, snr_db)
311-
312-
# return noisy_audio.detach().numpy()
313-
314-
315-
# @dataclass
316-
# class EmbeddedHomeNoise(AddHomeNoise):
317-
# """Add a home background noise (from QUTNOise dataset) to the data sample with a known snr_db
318-
# (signal to noise ratio).
319-
320-
# The difference with AddHomeNoise is that a leading (/and trainling) noise will be added to the augmented sample.
321-
# Parameters:
322-
# noise_length (int): the length of noise (in seconds) that will be added to the sample
323-
# two_sided (bool): if True the augmented signal will be encompassed between leading and trailing noises
324-
# Args:
325-
# audio (np.ndarray): data sample
326-
# Returns:
327-
# np.ndarray: data sample with added noise at the begining
328-
# """
329-
330-
# noise_length: int = None
331-
# two_sided: bool = False
332-
333-
# def __post_init__(self):
334-
# super().__post_init__()
335-
336-
# if self.noise_length is None:
337-
# raise ValueError("noise length is not specified")
338-
# elif self.noise_length > self.sample_length:
339-
# raise ValueError(
340-
# "in the current implementation length of noise can't exceed sample length"
341-
# )
342-
343-
# def __call__(self, audio: np.ndarray):
344-
# if not self.caching:
345-
# self.aug_index = random.choice(range(0, len(self.factors)))
346-
# snr_db = torch.tensor([self.factors[self.aug_index]])
347-
348-
# self.noise = torch.from_numpy(self.get_noise())
349-
# self.noise = torch.unsqueeze(self.noise, dim=0)
350-
# self.resampled_audio = torch.from_numpy(self.resample(audio))
351-
# noisy_audio = (
352-
# self.add_noise(self.resampled_audio, self.noise, snr_db).detach().numpy()
353-
# )
354-
355-
# initial_noise = self.scaled_noise[
356-
# :, 0 : int(self.target_sr * self.noise_length)
357-
# ]
358-
# if self.two_sided:
359-
# noise_then_audio = np.concatenate(
360-
# (initial_noise, noisy_audio, initial_noise), axis=1
361-
# )
362-
# else:
363-
# noise_then_audio = np.concatenate((initial_noise, noisy_audio), axis=1)
364-
365-
# return noise_then_audio
366-
367-
368171
@dataclass
369172
class RIR:
370173
"""Convolves a RIR (room impluse response) to the data sample.

0 commit comments

Comments
 (0)