update pre-trained model, clear the code

keonlee9420 · Jul 24, 2021 · d53992c · d53992c
1 parent 919d196
commit d53992c
Show file tree

Hide file tree

Showing 7 changed files with 9 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -16,11 +16,11 @@ pip3 install -r requirements.txt
 
 ## Inference
 
-You have to download the [pretrained models]() and put them in ``output/ckpt/LJSpeech/``.
+You have to download the [pretrained models](https://drive.google.com/drive/folders/1-zgh0ltDHfjYT0i5xsNCjg-MsDDrhga7?usp=sharing) and put them in ``output/ckpt/LJSpeech/``.
 
 For English single-speaker TTS, run
 ```
-python3 synthesize.py --text "YOUR_DESIRED_TEXT" --restore_step 900000 --mode single -p config/LJSpeech/preprocess.yaml -m config/LJSpeech/model.yaml -t config/LJSpeech/train.yaml
+python3 synthesize.py --text "YOUR_DESIRED_TEXT" --restore_step RESTORE_STEP --mode single -p config/LJSpeech/preprocess.yaml -m config/LJSpeech/model.yaml -t config/LJSpeech/train.yaml
 ```
 The generated utterances will be put in ``output/result/``.
 
@@ -29,7 +29,7 @@ The generated utterances will be put in ``output/result/``.
 Batch inference is also supported, try
 
 ```
-python3 synthesize.py --source preprocessed_data/LJSpeech/val.txt --restore_step 900000 --mode batch -p config/LJSpeech/preprocess.yaml -m config/LJSpeech/model.yaml -t config/LJSpeech/train.yaml
+python3 synthesize.py --source preprocessed_data/LJSpeech/val.txt --restore_step RESTORE_STEP --mode batch -p config/LJSpeech/preprocess.yaml -m config/LJSpeech/model.yaml -t config/LJSpeech/train.yaml
 ```
 to synthesize all utterances in ``preprocessed_data/LJSpeech/val.txt``
 
@@ -68,11 +68,11 @@ tensorboard --logdir output/log/LJSpeech
 ```
 
 to serve TensorBoard on your localhost.
-<!-- The loss curves, synthesized mel-spectrograms, and audios are shown.
+The loss curves, synthesized mel-spectrograms, and audios are shown.
 
 ![](./img/tensorboard_loss.png)
 ![](./img/tensorboard_spec.png)
-![](./img/tensorboard_audio.png) -->
+![](./img/tensorboard_audio.png)
 
 # Implementation Issues
 

diff --git a/config/LJSpeech/preprocess.yaml b/config/LJSpeech/preprocess.yaml
@@ -3,7 +3,7 @@ dataset: "LJSpeech"
 path:
   corpus_path: "/mnt/nfs2/speech-datasets/en/LJSpeech-1.1"
   lexicon_path: "lexicon/librispeech-lexicon.txt"
-  raw_path: "/ssd2/implementations/raw_data/LJSpeech_22k"
+  raw_path: "./raw_data/LJSpeech"
   preprocessed_path: "./preprocessed_data/LJSpeech"
 
 preprocessing:

diff --git a/config/LJSpeech/train.yaml b/config/LJSpeech/train.yaml
@@ -1,7 +1,7 @@
 path:
-  ckpt_path: "./output/ckpt/LJSpeech_pr"
-  log_path: "./output/log/LJSpeech_pr"
-  result_path: "./output/result/LJSpeech_pr"
+  ckpt_path: "./output/ckpt/LJSpeech"
+  log_path: "./output/log/LJSpeech"
+  result_path: "./output/result/LJSpeech"
 optimizer:
   batch_size: 32
   betas: [0.9, 0.999]

diff --git a/img/tensorboard_audio.png b/img/tensorboard_audio.png
diff --git a/img/tensorboard_loss.png b/img/tensorboard_loss.png
diff --git a/img/tensorboard_spec.png b/img/tensorboard_spec.png
diff --git a/utils/tools.py b/utils/tools.py
@@ -256,7 +256,6 @@ def plot_mel(data, titles, save_dir=None):
 
     for i in range(len(data)):
         mel = data[i]
-        print(mel.shape)
         axes[i][0].imshow(mel, origin="lower")
         axes[i][0].set_aspect(2.5, adjustable="box")
         axes[i][0].set_ylim(0, mel.shape[0])