diff --git a/audio_synthesizer.py b/audio_synthesizer.py index 0e600d3..c979fbb 100644 --- a/audio_synthesizer.py +++ b/audio_synthesizer.py @@ -32,6 +32,10 @@ def __init__( self._apply_event_gains = db_config._apply_class_gains self._db_name = params['db_name'] self._fs = params['fs'] + self._pitch_shift = params['random_pitch_shift'] + if self._pitch_shift: + self._bins_per_octave = params['bins_per_octave'] + self._n_bins_up_down = params['n_bins_up_down'] if self._apply_event_gains: self._class_gains = db_config._class_gains @@ -97,8 +101,9 @@ def synthesize_mixtures(self): eventsig, fs_db = librosa.load(filename, sr=self._fs) # here we need librosa since we are loading .mp3 else: raise Exception(f"Unknown event database: {self._db_name}") - - + if self._pitch_shift: + n_steps = np.random.choice(range(-self._n_bins_up_down,self._n_bins_up_down+1)) + eventsig = librosa.effects.pitch_shift(eventsig, sr=fs_db, n_steps=n_steps, bins_per_octave=self._bins_per_octave) if len(np.shape(eventsig)) > 1: diff --git a/generation_parameters.py b/generation_parameters.py index 8a6f9f7..0c8732a 100644 --- a/generation_parameters.py +++ b/generation_parameters.py @@ -37,6 +37,9 @@ def get_params(argv='1'): mixture_duration = 60., #in seconds event_time_per_layer = 40., #in seconds (should be less than mixture_duration) audio_format = 'both', # 'foa' (First Order Ambisonics) or 'mic' (four microphones) or 'both' + random_pitch_shift = True, + bins_per_octave = 12, + n_bins_up_down = 6, ) @@ -47,7 +50,7 @@ def get_params(argv='1'): elif argv == '2': ###### FSD50k DATA params['db_name'] = 'fsd50k' params['db_path']= '/home/iran/datasets/FSD50K' - params['mixturepath'] = '/datasets/SELD-dataset-sofa' + params['mixturepath'] = '/datasets/SELD-dataset-pitch' params['active_classes'] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] params['max_polyphony'] = 2