[release-0.19] mainブランチをrelease-0.19にマージ (#2036)

## 内容いくつかのhoftixを適用した今のmainブランチをrelease-0.19ブランチにマージします。（FF mergeなのでそのままやってもよいのですが、記録に残すという意味も兼ねて） ## その他
VOICEVOX · Apr 28, 2024 · 66d0f4a · 66d0f4a
2 parents fb0adce + 2106181
commit 66d0f4a
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 23 deletions.
diff --git a/public/howtouse.md b/public/howtouse.md
@@ -323,7 +323,7 @@ VOICEVOX では、歌声合成機能がプロトタイプ版として提供さ
 
 ### ピッチ編集
 
-「設定」の「実験的機能」から「ソング：ピッチ編集機能」をONにすることで、歌の音程を細かく制御することができます。
+「設定」→「オプション」→「実験的機能」から「ソング：ピッチ編集機能」をONにすることで、歌の音程を細かく制御することができます。
 
 ### ソング機能のよくある質問
 

diff --git a/src/components/Sing/SequencerPitch.vue b/src/components/Sing/SequencerPitch.vue
@@ -47,7 +47,7 @@ const props = defineProps<{
     | { type: "erase"; startFrame: number; frameLength: number };
 }>();
 
-const { warn } = createLogger("SequencerPitch");
+const { warn, error } = createLogger("SequencerPitch");
 const store = useStore();
 const singingGuides = computed(() => [...store.state.singingGuides.values()]);
 const pitchEditData = computed(() => {
@@ -274,29 +274,36 @@ const updateOriginalPitchDataSectionMap = async () => {
       throw new Error("phonemes.length is 0.");
     }
     const f0 = singingGuide.query.f0;
-    const startTime = singingGuide.startTime;
-    const startFrame = Math.round(startTime * frameRate);
-    const endFrame = startFrame + f0.length;
 
     // 各フレームの音素の配列を生成する
     const framePhonemes = convertToFramePhonemes(phonemes);
     if (f0.length !== framePhonemes.length) {
       throw new Error("f0.length and framePhonemes.length do not match.");
     }
 
+    // 歌い方の開始フレームと終了フレームを計算する
+    const singingGuideFrameLength = f0.length;
+    const singingGuideStartFrame = Math.round(
+      singingGuide.startTime * frameRate,
+    );
+    const singingGuideEndFrame =
+      singingGuideStartFrame + singingGuideFrameLength;
+
     // 無声子音区間以外のf0をtempDataにコピーする
     // NOTE: 無声子音区間は音程が無く、f0の値が大きく上下するので表示しない
-    if (tempData.length < endFrame) {
-      const valuesToPush = new Array(endFrame - tempData.length).fill(
-        VALUE_INDICATING_NO_DATA,
-      );
+    if (tempData.length < singingGuideEndFrame) {
+      const valuesToPush = new Array(
+        singingGuideEndFrame - tempData.length,
+      ).fill(VALUE_INDICATING_NO_DATA);
       tempData.push(...valuesToPush);
     }
-    for (let i = 0; i < f0.length; i++) {
-      const phoneme = framePhonemes[i];
+    const startFrame = Math.max(0, singingGuideStartFrame);
+    const endFrame = singingGuideEndFrame;
+    for (let i = startFrame; i < endFrame; i++) {
+      const phoneme = framePhonemes[i - singingGuideStartFrame];
       const unvoiced = unvoicedPhonemes.includes(phoneme);
       if (!unvoiced) {
-        tempData[startFrame + i] = f0[i];
+        tempData[i] = f0[i - singingGuideStartFrame];
       }
     }
   }
@@ -425,6 +432,13 @@ onMountedOrActivated(() => {
   });
   stage = new PIXI.Container();
 
+  // webGLVersionをチェックする
+  // 2未満の場合、ピッチの表示ができないのでエラーとしてロギングする
+  const webGLVersion = renderer.context.webGLVersion;
+  if (webGLVersion < 2) {
+    error(`webGLVersion is less than 2. webGLVersion: ${webGLVersion}`);
+  }
+
   const callback = () => {
     if (renderInNextFrame) {
       render();

diff --git a/src/components/Sing/ToolBar/EditTargetSwicher.vue b/src/components/Sing/ToolBar/EditTargetSwicher.vue
@@ -22,13 +22,14 @@
     class="margin-right"
     @click="editTarget !== 'PITCH' && changeEditTarget('PITCH')"
     ><QTooltip :delay="500" anchor="bottom middle"
-      >ピッチ編集<br />Ctrl+クリックで消去</QTooltip
+      >ピッチ編集<br />{{ !isMac ? "Ctrl" : "Cmd" }}+クリックで消去</QTooltip
     ></QBtn
   >
 </template>
 
 <script setup lang="ts">
 import { SequencerEditTarget } from "@/store/type";
+import { isMac } from "@/type/preload";
 
 defineProps<{
   editTarget: SequencerEditTarget;

diff --git a/src/sing/domain.ts b/src/sing/domain.ts
@@ -443,15 +443,21 @@ export function applyPitchEdit(
     throw new Error("f0.length and framePhonemes.length do not match.");
   }
 
-  const startFrame = Math.round(
+  // 歌い方の開始フレームと終了フレームを計算する
+  const singingGuideFrameLength = f0.length;
+  const singingGuideStartFrame = Math.round(
     singingGuide.startTime * singingGuide.frameRate,
   );
-  const endFrame = Math.min(startFrame + f0.length, pitchEditData.length);
+  const singingGuideEndFrame = singingGuideStartFrame + singingGuideFrameLength;
+
+  // ピッチ編集をf0に適用する
+  const startFrame = Math.max(0, singingGuideStartFrame);
+  const endFrame = Math.min(pitchEditData.length, singingGuideEndFrame);
   for (let i = startFrame; i < endFrame; i++) {
-    const phoneme = framePhonemes[i - startFrame];
+    const phoneme = framePhonemes[i - singingGuideStartFrame];
     const voiced = !unvoicedPhonemes.includes(phoneme);
     if (voiced && pitchEditData[i] !== VALUE_INDICATING_NO_DATA) {
-      f0[i - startFrame] = pitchEditData[i];
+      f0[i - singingGuideStartFrame] = pitchEditData[i];
     }
   }
 }

diff --git a/src/store/singing.ts b/src/store/singing.ts
@@ -1338,8 +1338,8 @@ export const singingStore = createPartialStore<SingingStoreTypes>({
                   `Fetched frame audio query. Phonemes are "${phonemes}".`,
                 );
 
+                // 音域調整を適用する
                 shiftGuidePitch(keyRangeAdjustment, query);
-                scaleGuideVolume(volumeRangeAdjustment, query);
 
                 const startTime = calcStartTime(
                   phrase.notes,
@@ -1363,9 +1363,12 @@ export const singingStore = createPartialStore<SingingStoreTypes>({
               });
             }
 
-            // 歌い方をコピーして、ピッチ編集を適用する
+            // ピッチ編集を適用する前に、歌い方をコピーする
 
             singingGuide = structuredClone(toRaw(singingGuide));
+
+            // ピッチ編集を適用する
+
             applyPitchEdit(singingGuide, pitchEditData, editFrameRate);
 
             // 歌声のキャッシュがあれば取得し、なければ音声合成を行う
@@ -1385,24 +1388,31 @@ export const singingStore = createPartialStore<SingingStoreTypes>({
 
               logger.info(`Loaded singing voice from cache.`);
             } else {
-              // ピッチ編集を適用したクエリから音量を作る
+              // 音量生成用のクエリを作る
+              // ピッチ編集を適用したクエリをコピーし、
+              // f0をもう一度シフトして、f0生成時の（シフトする前の）高さに戻す
+              const queryForVolume = structuredClone(singingGuide.query);
+              shiftGuidePitch(-keyRangeAdjustment, queryForVolume);
+
+              // 音量生成用のクエリから音量を作る
               // 音量値はAPIを叩く毎に変わるので、calc hashしたあとに音量を取得している
               const notesForRequestToEngine = createNotesForRequestToEngine(
                 phrase.notes,
                 tempos,
                 tpqn,
-                keyRangeAdjustment,
+                keyRangeAdjustment, // f0を生成するときと同様に、noteのkeyのシフトを行う
                 singingGuide.frameRate,
                 restDurationSeconds,
               );
-
               const volumes = await dispatch("FETCH_SING_FRAME_VOLUME", {
                 notes: notesForRequestToEngine,
-                frameAudioQuery: singingGuide.query,
+                frameAudioQuery: queryForVolume,
                 styleId: singingTeacherStyleId,
                 engineId: singerAndFrameRate.singer.engineId,
               });
               singingGuide.query.volume = volumes;
+
+              // 声量調整を適用する
               scaleGuideVolume(volumeRangeAdjustment, singingGuide.query);
 
               const blob = await synthesize(