-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpraat_simplifier.py
221 lines (164 loc) · 8.38 KB
/
praat_simplifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import os
import argparse
try:
import parselmouth
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
except ImportError as e:
print(f'{e}. Install the required dependencies. Try: <pip install -r requirements.txt>')
class PraatSimplifier():
def __init__(self, in_dir: str, out_dir: str):
self.in_dir = in_dir
self.out_dir = out_dir
def save_to_mono(self):
"""
Convert all files to mono if necessary.
"""
for file in os.listdir(self.in_dir):
if file.endswith('.wav'):
sound = parselmouth.Sound(os.path.join(self.in_dir, file))
mono_signal = sound.convert_to_mono()
output_path = os.path.join(self.out_dir, file)
mono_signal.save(output_path, format='WAV')
print(f'Mono sound saved to {output_path}')
print('Done.')
def get_formants(self, n_timestamps: int = 10, n_formants: int = 3) -> list:
"""
Extract formants from audio files in the specified directory.
Parameters:
n_timestamps (int): The number of timestamps to sample per file.
n_formants (int): The number of formants to extract.
Returns:
list: A list of dictionaries containing formant data.
"""
self.n_timestamps = n_timestamps
self.n_formants = n_formants
self.f_data = []
for file in os.listdir(self.in_dir):
if file.endswith('.wav'):
print(f'Analyzing {file}...')
sound = parselmouth.Sound(os.path.join(self.in_dir, file))
sound = sound.convert_to_mono()
dur = sound.get_total_duration()
time_points = np.linspace(0, dur, self.n_timestamps)
formant = sound.to_formant_burg(max_number_of_formants=self.n_formants)
for time in time_points:
formant_data = {'sound': file[:-4], 'time': time}
for i in range(1, self.n_formants + 1):
formant_value = formant.get_value_at_time(i, time)
formant_data[f'F{i}'] = round(formant_value, 3) if formant_value is not None else None
self.f_data.append(formant_data)
return self.f_data
def export_formants(self):
"""
Export the extracted formant data to a CSV file.
"""
if not self.f_data:
print("No formant data to export. Run get_formants() first.")
return
df = pd.DataFrame(self.f_data)
file_path = f'{self.out_dir}/formants.csv' if self.out_dir else 'formants.csv'
df.to_csv(file_path, index=False)
print(f'File saved to {file_path}')
def plot_sound_amplitude(self, sound_dir: str, start_time: float = None, end_time: float = None, save_amplitude_plot: bool = False):
"""
Plots the sound amplitude of a sound file.
Parameters:
- sound_dir: directory to the selected sound
- start_time: start at time x (float)
- end_time: trim at time y (float)
Returns: An amplitude plot of the sound.
"""
assert sound_dir.endswith('.wav'), 'plot_sound_amplitude() can only process one .wav sound file.'
try:
sound = parselmouth.Sound(sound_dir)
if start_time is not None and end_time is not None:
extracted_sound = sound.extract_part(from_time=start_time, to_time=end_time, preserve_times=True)
else:
extracted_sound = sound
extracted_sound = extracted_sound.convert_to_mono()
plt.figure(figsize=(10, 5))
plt.plot(extracted_sound.xs(), extracted_sound.values[0], linewidth=0.3, color='rebeccapurple')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.title('Sound Wave')
plt.grid(True, alpha=0.5)
if save_amplitude_plot:
assert self.out_dir, 'Specify --out_dir to save the plot.'
if not os.path.exists(self.out_dir):
os.makedirs(self.out_dir)
plt.savefig(os.path.join(self.out_dir, 'amplitude_plot.png'), dpi=1200)
plt.show()
except Exception as e:
print(f'An error occurred: {e}')
def plot_formants(self, save_formant_plot: bool = False, dpi: int = 1200):
"""
Plot the extracted F-values by sound (maximum of 9 for clarity.)
"""
if not self.f_data:
print('No formant data. Run get_formants() first.')
return
unique_sounds = min(len(set(d['sound'] for d in self.f_data)), 9)
nrows = np.ceil(unique_sounds / 3).astype(int)
fig, axs = plt.subplots(nrows, 3, figsize=(10, nrows * 3))
fig.tight_layout(pad=3)
axs = axs.flatten()
for ax, (sound, df) in zip(axs, pd.DataFrame(self.f_data).groupby('sound')):
for i in range(1, self.n_formants + 1):
ax.plot(df['time'], df[f'F{i}'], label=f'F{i}')
ax.set_title(sound, fontsize=10)
ax.set_xlabel('Time (s)', fontsize=10)
ax.set_ylabel('Frequency (Hz)', fontsize=10)
ax.tick_params(axis='x', labelsize=8)
ax.tick_params(axis='y', labelsize=8)
ax.legend(fontsize=8)
for i in range(unique_sounds, len(axs)):
axs[i].set_visible(False)
if save_formant_plot:
if not os.path.exists(self.out_dir):
os.makedirs(self.out_dir)
file_path = os.path.join(self.out_dir, 'formant_plots.png')
try:
plt.savefig(file_path, dpi=dpi)
print(f"Plot saved successfully to {file_path}")
except Exception as e:
print(f"Failed to save the plot: {e}")
plt.show()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Simplified code to extract formant values in a .csv file.')
"""
Args for formant analysis
"""
parser.add_argument('--in_dir', type=str, required=False, help='Directory to your sound files.')
parser.add_argument('--out_dir', type=str, required=False, default='./', help='Output directory for the .csv file.')
parser.add_argument('--get_formants', type=bool, required=False, default=False, help='Analyze formants.')
parser.add_argument('--n_timestamps', type=int, required=False, default=10, help='Number of timestamps to extract the formants from.')
parser.add_argument('--n_formants', type=int, required=False, default=3, help='Number of formants to extract.')
parser.add_argument('--export_formants_file', type=bool, required=False, default=False, help='Export .csv with formants.')
parser.add_argument('--save_formant_plot', type=bool, required=False, default=False, help='True = save plot; False = do not save')
parser.add_argument('--dpi', type=int, required=False, default=300, help='Quality of plot.')
"""
Args for audio transformantion
"""
parser.add_argument('--save_to_mono', type=bool, required=False, default=False, help='Save to mono all the sounds in the in_dir.')
"""
Args for sound amplitude ploting.
"""
parser.add_argument('--plot_sound_amplitude', type=bool, required=False, default=False, help='Plot a sound amplitude.')
parser.add_argument('--sound_dir', type=str, required=False, help='Path to the sound file for plotting amplitude.')
parser.add_argument('--start_time', type=float, required=False, help='Start time to plot.')
parser.add_argument('--end_time', type=float, required=False, help='End time to plot.')
parser.add_argument('--save_amplitude_plot', type=bool, required=False, default=False, help='Save amplitude plot.')
args = parser.parse_args()
simplifier = PraatSimplifier(args.in_dir, args.out_dir)
if args.get_formants:
formants = simplifier.get_formants(n_formants=args.n_formants, n_timestamps=args.n_timestamps)
if args.export_formants_file:
simplifier.export_formants()
if args.save_formant_plot:
simplifier.plot_formants(save_formant_plot=True, dpi=args.dpi)
if args.save_to_mono:
simplifier.save_to_mono()
if args.plot_sound_amplitude:
simplifier.plot_sound_amplitude(args.sound_dir, args.start_time, args.end_time, args.save_amplitude_plot)