-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
executable file
·123 lines (104 loc) · 4.57 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon May 4 18:11:08 2020
@author: vineeth, ashwin, shashank
"""
import librosa
import numpy as np
import imageio
import cv2
import sys
from tensorflow.keras.models import load_model
from tensorflow_addons.layers import InstanceNormalization
from os import path,listdir
# import warnings, ParameterError
from scipy.io.wavfile import write
path_to_models = "./models"
path_to_audio = "./test_audio/SeenAudio"
path_to_results = "./results/GoodAudio2"
def get_spectrogram(name_of_audio):
y, sr = librosa.load(path.join(path_to_audio,name_of_audio),sr=16000)
write(path.join(path_to_results,name_of_audio), 16000, y*3)
max_len = 257
D = np.abs(librosa.stft(y,n_fft = 512,hop_length = 256))
amp_max = np.amax(D)
x = librosa.amplitude_to_db(D, ref=np.max)
y_len = x.shape[1]
x = x[:,y_len//2-max_len//2:y_len//2+(max_len+1)//2]
imageio.imwrite(path.join(path_to_results,name_of_audio[:-4]+".jpg"), x)
im = cv2.imread(path.join(path_to_results,name_of_audio[:-4]+".jpg"),-1)
im_f = np.zeros((260,260))
im_f[:257,:257] = im[:257,:257]
im_f = (im_f - 127.5) / 127.5
return im_f
def griffinlim(S, n_iter=32, hop_length=None, win_length=None, window='hann',
center=True, dtype=np.float32, length=None, pad_mode='reflect',
momentum=0.99, init='random', random_state=None):
if random_state is None:
rng = np.random
elif isinstance(random_state, int):
rng = np.random.RandomState(seed=random_state)
elif isinstance(random_state, np.random.RandomState):
rng = random_state
if momentum > 1:
warnings.warn('Griffin-Lim with momentum={} > 1 can be unstable. '
'Proceed with caution!'.format(momentum))
elif momentum < 0:
raise ParameterError('griffinlim() called with momentum={} < 0'.format(momentum))
# Infer n_fft from the spectrogram shape
n_fft = 2 * (S.shape[0] - 1)
# using complex64 will keep the result to minimal necessary precision
angles = np.empty(S.shape, dtype=np.complex64)
if init == 'random':
# randomly initialize the phase
angles[:] = np.exp(2j * np.pi * rng.rand(*S.shape))
elif init is None:
# Initialize an all ones complex matrix
angles[:] = 1.0
else:
raise ParameterError("init={} must either None or 'random'".format(init))
# And initialize the previous iterate to 0
rebuilt = 0.
for _ in range(n_iter):
# Store the previous iterate
tprev = rebuilt
# Invert with our current estimate of the phases
inverse = librosa.istft(S * angles, hop_length=hop_length, win_length=win_length,
window=window, center=center, dtype=dtype, length=length)
# Rebuild the spectrogram
rebuilt = librosa.stft(inverse, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=window, center=center,
pad_mode=pad_mode)
# Update our phase estimates
angles[:] = rebuilt - (momentum / (1 + momentum)) * tprev
angles[:] /= np.abs(angles) + 1e-16
# Return the final phase estimates
return librosa.istft(S * angles, hop_length=hop_length, win_length=win_length,
window=window, center=center, dtype=dtype, length=length)
def spec_to_audio(X_out, name_gen):
imageio.imwrite(path.join(path_to_results,name_gen+".jpg"), X_out.reshape(260,260))
im = cv2.imread(path.join(path_to_results,name_gen+".jpg"),-1)
im = im[:257,:257]
im = (im*80.0/255.0 ) -80.0
im = librosa.db_to_amplitude(im)
y2 = griffinlim(im,hop_length=256)
write(path.join(path_to_results,name_gen+".wav"), 16000, y2*3)
def tester(name_of_audio,name_of_model):
domain2 = name_of_model.split("_")[2].split("2")[1]
A_real = get_spectrogram(name_of_audio)
A_real = np.reshape(A_real,(1,260,260,1))
cust = {'InstanceNormalization': InstanceNormalization}
model_AtoB = load_model(path.join(path_to_models,name_of_model), cust)
B_generated = model_AtoB.predict(A_real)
name_gen = name_of_audio[:-4] +"_"+domain2+"_generated"
spec_to_audio(B_generated[0],name_gen)
if __name__ == "__main__":
# name_of_model = "g_model_calm2surprised_030000.h5"
# name_of_model = "g_model_calm2disgust_030000.h5"
# name_of_model = "g_model_calm2sad_030000.h5"
name_of_model = "g_model_calm2anger.h5"
# name_of_model = "g_model_calm2happy_030000.h5"
# name_of_model = "g_model_calm2fearful_039000.h5"
for name_of_audio in listdir(path_to_audio):
tester(name_of_audio,name_of_model)