pitch shift

MaxMax2016 · MaxMax2016 · commit 605097041228 · 2023-04-03T21:21:33.000+08:00
diff --git a/README.md b/README.md
@@ -89,10 +89,11 @@ https://user-images.githubusercontent.com/16432329/228889388-d7658930-6187-48a8-
 **PS.** 本项目集成了音效算法，你可以使用混响等常见音效
 
 啥？生成的音色不太像！
-```
-待补充~~~
+```python
 1，发音人音域统计
+训练第5步生成：lora_pitch_statics.npy
 2，推理音区偏移
+指定pitch参数：python svc_inference.py --config config/maxgan.yaml --model maxgan_g.pth --spk ./data_svc/lora_speaker.npy --statics ./data_svc/lora_pitch_statics.npy --wave test.wav
 ```
 
 ## 更好的音质
diff --git a/svc_inference.py b/svc_inference.py
@@ -86,6 +86,27 @@ def main(args):
     ppg = torch.FloatTensor(ppg)
 
     pit = compute_f0_nn(args.wave, device)
+    if (args.statics == None):
+        print("don't use pitch shift")
+    else:
+        source = pit[pit > 0]
+        source_ave = source.mean()
+        source_min = source.min()
+        source_max = source.max()
+        print(f"source pitch statics: mean={source_ave:0.1f}, \
+                min={source_min:0.1f}, max={source_max:0.1f}")
+        singer_ave, singer_min, singer_max = np.load(args.statics)
+        print(f"singer pitch statics: mean={singer_ave:0.1f}, \
+                min={singer_min:0.1f}, max={singer_max:0.1f}")
+
+        shift = np.log2(singer_ave/source_ave) * 12
+        if (singer_ave >= source_ave):
+            shift = np.floor(shift)
+        else:
+            shift = np.ceil(shift)
+        shift = 2 ** (shift / 12)
+        pit = pit * shift
+
     pit = torch.FloatTensor(pit)
     pos = torch.LongTensor(pos)
 
@@ -123,6 +144,8 @@ def main(args):
                         help="Path of raw audio.")
     parser.add_argument('-s', '--spk', type=str, required=True,
                         help="Path of speaker.")
+    parser.add_argument('-t', '--statics', type=str,
+                        help="Path of pitch statics.")
     args = parser.parse_args()
 
     main(args)