diff --git a/ml/data_processing/audio_augment.py b/ml/data_processing/audio_augment.py index cd9d3fb..4231c75 100644 --- a/ml/data_processing/audio_augment.py +++ b/ml/data_processing/audio_augment.py @@ -95,6 +95,9 @@ def augment_all_audio( if "TM" in augmentations_to_perform: spectrogram = self.time_mask(spectrogram, time_mask) + #TODO Uncomment this and run + #spectrogram = processor.augment_hue(spectrogram); + # Save image file save_path = os.path.join(save_directory, filepath[:-4] + "_aug.png") print(save_path) diff --git a/ml/data_processing/spectrogram_processor.py b/ml/data_processing/spectrogram_processor.py index 8446ed8..785b24e 100644 --- a/ml/data_processing/spectrogram_processor.py +++ b/ml/data_processing/spectrogram_processor.py @@ -4,6 +4,7 @@ This module provides the `SpectrogramProcessor` class for converting audio files into spectrograms, normalizing them, and extracting features. """ +import random import numpy as np import librosa @@ -13,6 +14,8 @@ from pydub import AudioSegment import multiprocessing from concurrent.futures import ProcessPoolExecutor +import cv2 + class SpectrogramProcessor(ImageProcessor): @@ -219,6 +222,23 @@ def apply_stft(self, audio_clip: np.ndarray) -> np.ndarray: return log_spectro + def augment_hue(self, spectrogram: np.ndarray) -> np.ndarray: + # Randomly choose a hue shift from the range [-0.2, 0.2] + hue_shift = random.uniform(-0.2, 0.2) + augmented_spectrogram = self.change_hue(spectrogram, hue_shift) + return augmented_spectrogram + + def change_hue(self, image: np.ndarray, hue_shift: float) -> np.ndarray: + # Convert to HSV color space + hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) + + # Apply hue shift + hsv_image[..., 0] = (hsv_image[..., 0].astype(np.float32) + hue_shift * 180) % 180 + + # Convert back to RGB + augmented_image = cv2.cvtColor(hsv_image.astype(np.uint8), cv2.COLOR_HSV2RGB) + return augmented_image + def plot_spectrogram(self, filename, spectrogram) -> None: """Plots the spectrogram using Matplotlib