marl · iranroman · Aug 24, 2023 · Aug 24, 2023 · Aug 25, 2023 · Aug 25, 2023
diff --git a/audio_synthesizer.py b/audio_synthesizer.py
@@ -32,6 +32,10 @@ def __init__(
         self._apply_event_gains = db_config._apply_class_gains
         self._db_name = params['db_name']
         self._fs = params['fs']
+        self._pitch_shift = params['random_pitch_shift']
+        if self._pitch_shift:
+            self._bins_per_octave = params['bins_per_octave']
+            self._n_bins_up_down = params['n_bins_up_down']
         if self._apply_event_gains:
             self._class_gains = db_config._class_gains
 
@@ -97,8 +101,9 @@ def synthesize_mixtures(self):
                             eventsig, fs_db = librosa.load(filename, sr=self._fs) # here we need librosa since we are loading .mp3 
                         else:
                             raise Exception(f"Unknown event database: {self._db_name}")
-
-
+                        if self._pitch_shift:
+                            n_steps = np.random.choice(range(-self._n_bins_up_down,self._n_bins_up_down+1))
+                            eventsig = librosa.effects.pitch_shift(eventsig, sr=fs_db, n_steps=n_steps, bins_per_octave=self._bins_per_octave)
 
 
                         if len(np.shape(eventsig)) > 1:

diff --git a/generation_parameters.py b/generation_parameters.py
@@ -37,6 +37,9 @@ def get_params(argv='1'):
         mixture_duration = 60., #in seconds
         event_time_per_layer = 40., #in seconds (should be less than mixture_duration)
         audio_format = 'both', # 'foa' (First Order Ambisonics) or 'mic' (four microphones) or 'both'
+        random_pitch_shift = True,
+        bins_per_octave = 12,
+        n_bins_up_down = 6,
             )
 
 
@@ -47,7 +50,7 @@ def get_params(argv='1'):
     elif argv == '2': ###### FSD50k DATA
         params['db_name'] = 'fsd50k'
         params['db_path']= '/home/iran/datasets/FSD50K'
-        params['mixturepath'] = '/datasets/SELD-dataset-sofa'
+        params['mixturepath'] = '/datasets/SELD-dataset-pitch'
         params['active_classes'] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
         params['max_polyphony'] = 2