@@ -84,17 +84,19 @@ def apply(self, samples, sample_rate):
84
84
class TimeMask (BasicTransform ):
85
85
"""Mask some time band on the spectrogram. Inspired by https://arxiv.org/pdf/1904.08779.pdf """
86
86
87
- def __init__ (self , min_band_part = 0.0 , max_band_part = 0.5 , p = 0.5 ):
87
+ def __init__ (self , min_band_part = 0.0 , max_band_part = 0.5 , fade = False , p = 0.5 ):
88
88
"""
89
89
:param min_band_part: Minimum length of the silent part as a fraction of the
90
90
total sound length. Float.
91
91
:param max_band_part: Maximum length of the silent part as a fraction of the
92
92
total sound length. Float.
93
+ :param fade: Bool, Add linear fade in and fade out of the silent part.
93
94
:param p:
94
95
"""
95
96
super ().__init__ (p )
96
97
self .min_band_part = min_band_part
97
98
self .max_band_part = max_band_part
99
+ self .fade = fade
98
100
99
101
def apply (self , samples , sample_rate ):
100
102
new_samples = samples .copy ()
@@ -103,41 +105,12 @@ def apply(self, samples, sample_rate):
103
105
int (new_samples .shape [0 ] * self .max_band_part ),
104
106
)
105
107
_t0 = random .randint (0 , new_samples .shape [0 ] - _t )
106
- new_samples [_t0 : _t0 + _t ] = 0
107
- return new_samples
108
-
109
-
110
- class SmoothFadeTimeMask (BasicTransform ):
111
- """Mask some time band on the spectrogram with fade in and fade out.
112
-
113
- Same transformation as TimeMask but with linear smoothing"""
114
-
115
- def __init__ (self , min_band_part = 0.0 , max_band_part = 0.5 , p = 0.5 ):
116
- """
117
- :param min_band_part: Minimum length of the silent part as a fraction of the
118
- total sound length. Float.
119
- :param max_band_part: Maximum length of the silent part as a fraction of the
120
- total sound length. Float.
121
- :param p:
122
- """
123
- super ().__init__ (p )
124
- self .min_band_part = min_band_part
125
- self .max_band_part = max_band_part
126
-
127
- def apply (self , samples , sample_rate ):
128
- new_samples = samples .copy ()
129
- _t = random .randint (
130
- int (new_samples .shape [0 ] * self .min_band_part ),
131
- int (new_samples .shape [0 ] * self .max_band_part ),
132
- )
133
- _t0 = random .randint (0 , new_samples .shape [0 ] - _t )
134
- # fade length is 10 ms or 10% of silent part if silent part is less than 10 ms
135
- fade_length = min (int (sample_rate * 0.01 ), int (_t * 0.1 ))
136
- linear_fade_in = np .linspace (0 , 1 , num = fade_length )
137
- linear_fade_out = np .linspace (1 , 0 , num = fade_length )
138
- new_samples [_t0 : _t0 + fade_length ] *= linear_fade_out
139
- new_samples [_t0 + _t - fade_length : _t0 + _t ] *= linear_fade_in
140
- new_samples [_t0 + fade_length : _t0 + _t - fade_length ] = 0
108
+ mask = np .zeros (_t )
109
+ if self .fade :
110
+ fade_length = min (int (sample_rate * 0.01 ), int (_t * 0.1 ))
111
+ mask [0 :fade_length ] = np .linspace (1 , 0 , num = fade_length )
112
+ mask [- fade_length :] = np .linspace (0 , 1 , num = fade_length )
113
+ new_samples [_t0 : _t0 + _t ] *= mask
141
114
return new_samples
142
115
143
116
0 commit comments