-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_speaker_visualization.py
96 lines (77 loc) · 3.72 KB
/
run_speaker_visualization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
from Utility.EvaluationScripts.SpeakerVisualization import Visualizer
from run_text_to_file_reader import read_texts_as_ensemble
def visualize_random_speakers(generate=False):
if generate:
# first we generate a bunch of audios with random speaker embeddings
if not len(os.listdir("audios/random_speakers/")) != 0:
os.makedirs("audios/random_speakers/", exist_ok=True)
read_texts_as_ensemble(model_id="Libri", sentence="Hi, I am a completely random speaker that probably doesn't exist!",
filename="audios/random_speakers/libri", amount=100)
# then we visualize those audios
vs = Visualizer()
ltf = dict()
for audio_file in os.listdir("audios/random_speakers"):
if audio_file not in ltf:
ltf[audio_file] = list()
ltf[audio_file].append(f"audios/random_speakers/{audio_file}")
vs.visualize_speaker_embeddings(label_to_filepaths=ltf, title_of_plot="Embeddings of TTS with random Condition")
def visualize_libritts():
vs = Visualizer()
ltf = dict()
for speaker in os.listdir("audios/LibriTTS"):
ltf[speaker] = list()
for book in os.listdir(f"audios/LibriTTS/{speaker}"):
for audio_file in os.listdir(f"audios/LibriTTS/{speaker}/{book}"):
ltf[speaker].append(f"audios/LibriTTS/{speaker}/{book}/{audio_file}")
vs.visualize_speaker_embeddings(label_to_filepaths=ltf, title_of_plot="Embeddings of a Subset of LibriTTS")
def visualize_adept_experiment():
vs = Visualizer()
ltf = dict()
for exp in os.listdir("audios/adept_plot"):
for sample in os.listdir(f"audios/adept_plot/{exp}"):
spk_id = sample.split("_")[1].split(".")[0]
if spk_id == "ad00":
spk_label = "Female"
elif spk_id == "ad01":
spk_label = "Male"
else:
spk_label = "Other Female"
if exp == "human":
exp_label = "Human"
elif exp == "same_voice_diff_style":
exp_label = "Unconditioned"
else:
exp_label = "Cloned"
plot_label = f"{spk_label} - {exp_label}"
if exp_label != "Human" and spk_label != "Other Female":
if plot_label not in ltf:
ltf[plot_label] = list()
ltf[plot_label].append(f"audios/adept_plot/{exp}/{sample}")
vs.visualize_speaker_embeddings(label_to_filepaths=ltf,
title_of_plot="Speakers with and without Cloning",
include_pca=False,
colors=["limegreen", "darkgreen", "dodgerblue", "darkblue"])
def visualize_speakers_languages_crossover():
ltf = dict()
vs = Visualizer()
for file in os.listdir("audios/speakers_for_plotting"):
label = file.split("_")[0].capitalize() + " Speaker"
if label not in ltf:
ltf[label] = list()
ltf[label].append(f"audios/speakers_for_plotting/{file}")
vs.visualize_speaker_embeddings(label_to_filepaths=ltf, title_of_plot="Speakers Across Languages", include_pca=False)
def calculate_spk_sims_multiling():
ltf = dict()
vs = Visualizer()
for file in os.listdir("audios/speakers_for_plotting"):
label = file.split("_")[0]
if label not in ltf:
ltf[label] = list()
ltf[label].append(f"audios/speakers_for_plotting/{file}")
for reference in os.listdir("audios/multilanguage_references"):
label = reference.split(".")[0]
print(label)
print(vs.calculate_spk_sim(f"audios/multilanguage_references/{reference}", ltf[label]))
if __name__ == '__main__':
calculate_spk_sims_multiling()