-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsynthesize_midi_file.py
95 lines (83 loc) · 3.92 KB
/
synthesize_midi_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gin
import argparse
import tensorflow as tf
from absl import logging
from soundfile import write
from ddsp.training import trainers, train_util
from ddsp.training.models import get_model
from ddsp_piano.data_pipeline import get_dummy_data
from ddsp_piano.utils.io_utils import load_midi_as_conditioning, normalize_audio
def process_args():
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', type=str, help="A .gin model config.",
default='ddsp_piano/configs/maestro-v2.gin')
parser.add_argument('--ckpt', type=str, help="Model checkpoint to load.",
default='ddsp_piano/model_weights/v2/')
parser.add_argument('--piano_type', type=int, default=9,
help="Piano model (from 0 to 9).\
(default: %(default)s)")
parser.add_argument('-wu', '--warm_up', type=float, default=0.5,
help="Warm-up duration (in s, default: %(default)s)")
parser.add_argument('-d','--duration', type=float, default=None,
help="Maximum duration of synthesized audio.\
(default: %(default)s)")
parser.add_argument('-n', '--normalize', type=float, default=None,
help="Normalize audio to this amount of dBFS.\
(default: %(default)s)")
parser.add_argument('-u', '--unreverbed', action='store_true',
help="Also generates dry piano audio, without reverb.")
parser.add_argument('midi_file', type=str,
help="Piano MIDI file to synthesize.")
parser.add_argument('out_file', type=str,
help="Save audio as wav file.")
return parser.parse_args()
def main(args):
# Load MIDI data
logging.info("Loading midi file...")
inputs = load_midi_as_conditioning(args.midi_file,
duration=args.duration,
warm_up_duration=args.warm_up)
# Add piano model conditioning
inputs['piano_model'] = tf.convert_to_tensor([[args.piano_type]])
logging.info(
f"Midi file loaded (with duration {inputs['duration'] - args.warm_up} s).\
\nNow building the piano synthesizer..."
)
# Parse and override gin-config
gin.parse_config_file(args.config)
gin.bind_parameter('%inference', True)
gin.bind_parameter('%duration', inputs['duration'])
strategy = train_util.get_strategy()
with strategy.scope():
# Model contruction
model = get_model()
trainer = trainers.Trainer(model=model,
strategy=strategy)
trainer.build(get_dummy_data(batch_size=1,
duration=1.0,
sample_rate=model.sample_rate))
# Restore model weight
logging.info("Model built, now retrieving model weights...")
# trainer.optimizer = tf.keras.optimizers.legacy.Adam()
trainer.restore(args.ckpt)
# Forward pass
logging.info(f"Model weights loaded from {args.ckpt} \
\nNow synthesizing audio (this could take some time)...")
outs = model(inputs)
# Save final audio
write(args.out_file,
data=outs['audio_synth'][0, int(args.warm_up * model.sample_rate):].numpy(),
samplerate=model.sample_rate)
if args.normalize:
normalize_audio(args.out_file, args.normalize)
# Save dry audio (optional)
if args.unreverbed:
write(args.out_file + "_unreverbed.wav",
data=outs['add']['signal'][0, int(args.warm_up * model.sample_rate):].numpy(),
samplerate=model.sample_rate)
if args.normalize:
normalize_audio(args.out_file + "_unreverbed.wav", args.normalize)
logging.info(f"Audio saved at {args.out_file}.")
if __name__ == "__main__":
logging.set_verbosity(logging.INFO)
main(process_args())