diff --git a/crates/voicevox_core/src/__internal/interop.rs b/crates/voicevox_core/src/__internal/interop.rs index c8cd7101f..fe46d10bc 100644 --- a/crates/voicevox_core/src/__internal/interop.rs +++ b/crates/voicevox_core/src/__internal/interop.rs @@ -1 +1 @@ -pub use crate::synthesizer::blocking::PerformInference; +pub use crate::{metas::merge as merge_metas, synthesizer::blocking::PerformInference}; diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs index 12367dda2..db529e2e5 100644 --- a/crates/voicevox_core/src/infer/status.rs +++ b/crates/voicevox_core/src/infer/status.rs @@ -8,13 +8,14 @@ use std::{ use anyhow::bail; use educe::Educe; use enum_map::{Enum as _, EnumMap}; +use indexmap::IndexMap; use itertools::{iproduct, Itertools as _}; use crate::{ error::{ErrorRepr, LoadModelError, LoadModelErrorKind, LoadModelResult}, infer::{InferenceOperation, ParamInfo}, manifest::ModelInnerId, - metas::{SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, + metas::{self, SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, voice_model::{VoiceModelHeader, VoiceModelId}, Result, }; @@ -119,7 +120,7 @@ impl Status { #[derive(Educe)] #[educe(Default(bound = "R: InferenceRuntime, D: InferenceDomain"))] struct LoadedModels( - BTreeMap>, + IndexMap>, ); struct LoadedModel { @@ -130,11 +131,7 @@ struct LoadedModel { impl LoadedModels { fn metas(&self) -> VoiceModelMeta { - self.0 - .values() - .flat_map(|LoadedModel { metas, .. }| metas) - .cloned() - .collect() + metas::merge(self.0.values().flat_map(|LoadedModel { metas, .. }| metas)) } fn ids_for(&self, style_id: StyleId) -> Result<(VoiceModelId, ModelInnerId)> { @@ -184,20 +181,29 @@ impl LoadedModels { /// /// # Errors /// - /// 音声モデルIDかスタイルIDが`model_header`と重複するとき、エラーを返す。 + /// 次の場合にエラーを返す。 + /// + /// - 音声モデルIDかスタイルIDが`model_header`と重複するとき fn ensure_acceptable(&self, model_header: &VoiceModelHeader) -> LoadModelResult<()> { - let loaded = self.styles(); - let external = model_header - .metas - .iter() - .flat_map(|speaker| speaker.styles()); - let error = |context| LoadModelError { path: model_header.path.clone(), context, source: None, }; + let loaded = self.speakers(); + let external = model_header.metas.iter(); + for (loaded, external) in iproduct!(loaded, external) { + if loaded.speaker_uuid() == external.speaker_uuid() { + loaded.warn_diff_except_styles(external); + } + } + + let loaded = self.styles(); + let external = model_header + .metas + .iter() + .flat_map(|speaker| speaker.styles()); if self.0.contains_key(&model_header.id) { return Err(error(LoadModelErrorKind::ModelAlreadyLoaded { id: model_header.id.clone(), @@ -242,11 +248,12 @@ impl LoadedModels { Ok(()) } + fn speakers(&self) -> impl Iterator + Clone { + self.0.values().flat_map(|LoadedModel { metas, .. }| metas) + } + fn styles(&self) -> impl Iterator { - self.0 - .values() - .flat_map(|LoadedModel { metas, .. }| metas) - .flat_map(|speaker| speaker.styles()) + self.speakers().flat_map(|speaker| speaker.styles()) } } diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 77cb3a9fc..78314d52a 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -1,8 +1,42 @@ -use std::fmt::Display; +use std::fmt::{Debug, Display}; use derive_getters::Getters; use derive_new::new; +use indexmap::IndexMap; +use itertools::Itertools as _; use serde::{Deserialize, Serialize}; +use tracing::warn; + +/// [`speaker_uuid`]をキーとして複数の[`SpeakerMeta`]をマージする。 +/// +/// マージする際話者は[`SpeakerMeta::order`]、スタイルは[`StyleMeta::order`]をもとに安定ソートされる。 +/// `order`が無い話者とスタイルは、そうでないものよりも後ろに置かれる。 +/// +/// [`speaker_uuid`]: SpeakerMeta::speaker_uuid +pub fn merge<'a>(metas: impl IntoIterator) -> Vec { + return metas + .into_iter() + .fold(IndexMap::<_, SpeakerMeta>::new(), |mut acc, speaker| { + acc.entry(&speaker.speaker_uuid) + .and_modify(|acc| acc.styles.extend(speaker.styles.clone())) + .or_insert_with(|| speaker.clone()); + acc + }) + .into_values() + .update(|speaker| { + speaker + .styles + .sort_by_key(|&StyleMeta { order, .. }| key(order)); + }) + .sorted_by_key(|&SpeakerMeta { order, .. }| key(order)) + .collect(); + + fn key(order: Option) -> impl Ord { + order + .map(Into::into) + .unwrap_or_else(|| u64::from(u32::MAX) + 1) + } +} /// [`StyleId`]の実体。 /// @@ -15,7 +49,7 @@ pub type RawStyleId = u32; /// /// [**話者**(_speaker_)]: SpeakerMeta /// [**スタイル**(_style_)]: StyleMeta -#[derive(PartialEq, Eq, Clone, Copy, Ord, PartialOrd, Deserialize, Serialize, new, Debug)] +#[derive(PartialEq, Eq, Clone, Copy, Ord, Hash, PartialOrd, Deserialize, Serialize, new, Debug)] pub struct StyleId(RawStyleId); impl StyleId { @@ -65,6 +99,52 @@ pub struct SpeakerMeta { version: StyleVersion, /// 話者のUUID。 speaker_uuid: String, + /// 話者の順番。 + /// + /// `SpeakerMeta`の列は、この値に対して昇順に並んでいるべきである。 + order: Option, +} + +impl SpeakerMeta { + /// # Panics + /// + /// `speaker_uuid`が異なるときパニックする。 + pub(crate) fn warn_diff_except_styles(&self, other: &Self) { + let Self { + name: name1, + styles: _, + version: version1, + speaker_uuid: speaker_uuid1, + order: order1, + } = self; + + let Self { + name: name2, + styles: _, + version: version2, + speaker_uuid: speaker_uuid2, + order: order2, + } = other; + + if speaker_uuid1 != speaker_uuid2 { + panic!("must be equal: {speaker_uuid1} != {speaker_uuid2:?}"); + } + + warn_diff(speaker_uuid1, "name", name1, name2); + warn_diff(speaker_uuid1, "version", version1, version2); + warn_diff(speaker_uuid1, "order", order1, order2); + + fn warn_diff( + speaker_uuid: &str, + field_name: &str, + left: &T, + right: &T, + ) { + if left != right { + warn!("`{speaker_uuid}`: different `{field_name}` ({left:?} != {right:?})"); + } + } + } } /// **スタイル**(_style_)のメタ情報。 @@ -74,4 +154,113 @@ pub struct StyleMeta { id: StyleId, /// スタイル名。 name: String, + /// スタイルの順番。 + /// + /// [`SpeakerMeta::styles`]は、この値に対して昇順に並んでいるべきである。 + order: Option, +} + +#[cfg(test)] +mod tests { + use once_cell::sync::Lazy; + use serde_json::json; + + #[test] + fn merge_works() -> anyhow::Result<()> { + static INPUT: Lazy = Lazy::new(|| { + json!([ + { + "name": "B", + "styles": [ + { + "id": 3, + "name": "B_1", + "order": 0 + } + ], + "version": "0.0.0", + "speaker_uuid": "f34ab151-c0f5-4e0a-9ad2-51ce30dba24d", + "order": 1 + }, + { + "name": "A", + "styles": [ + { + "id": 2, + "name": "A_3", + "order": 2 + } + ], + "version": "0.0.0", + "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", + "order": 0 + }, + { + "name": "A", + "styles": [ + { + "id": 1, + "name": "A_1", + "order": 0 + }, + { + "id": 0, + "name": "A_2", + "order": 1 + } + ], + "version": "0.0.0", + "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", + "order": 0 + } + ]) + }); + + static EXPECTED: Lazy = Lazy::new(|| { + json!([ + { + "name": "A", + "styles": [ + { + "id": 1, + "name": "A_1", + "order": 0 + }, + { + "id": 0, + "name": "A_2", + "order": 1 + }, + { + "id": 2, + "name": "A_3", + "order": 2 + } + ], + "version": "0.0.0", + "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", + "order": 0 + }, + { + "name": "B", + "styles": [ + { + "id": 3, + "name": "B_1", + "order": 0 + } + ], + "version": "0.0.0", + "speaker_uuid": "f34ab151-c0f5-4e0a-9ad2-51ce30dba24d", + "order": 1 + } + ]) + }); + + let input = &serde_json::from_value::>(INPUT.clone())?; + let actual = serde_json::to_value(super::merge(input))?; + + pretty_assertions::assert_eq!(*EXPECTED, actual); + Ok(()) + } } diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 96bf481d1..fc8f4d20f 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -15,7 +15,17 @@ pub type RawVoiceModelId = String; /// 音声モデルID。 #[derive( - PartialEq, Eq, Clone, Ord, PartialOrd, Deserialize, new, Getters, derive_more::Display, Debug, + PartialEq, + Eq, + Clone, + Ord, + Hash, + PartialOrd, + Deserialize, + new, + Getters, + derive_more::Display, + Debug, )] pub struct VoiceModelId { raw_voice_model_id: RawVoiceModelId, diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs index 5a6f20f76..6755910f5 100644 --- a/crates/voicevox_core_c_api/src/compatible_engine.rs +++ b/crates/voicevox_core_c_api/src/compatible_engine.rs @@ -41,7 +41,9 @@ static VOICE_MODEL_SET: Lazy = Lazy::new(|| { .iter() .map(|vvm| (vvm.id().clone(), vvm.clone())) .collect(); - let metas: Vec<_> = all_vvms.iter().flat_map(|vvm| vvm.metas()).collect(); + let metas = voicevox_core::__internal::interop::merge_metas( + all_vvms.iter().flat_map(|vvm| vvm.metas()), + ); let mut style_model_map = BTreeMap::default(); for vvm in all_vvms.iter() { for meta in vvm.metas().iter() { diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml index 8f3fa4f3b..25926487e 100644 --- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml +++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml @@ -6,37 +6,44 @@ metas = ''' "styles": [ { "id": 0, - "name": "style1" + "name": "style1", + "order": null } ], "version": "0.0.1", - "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7" + "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", + "order": null }, { "name": "dummy2", "styles": [ { "id": 1, - "name": "style2" + "name": "style2", + "order": null } ], "version": "0.0.1", - "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905" + "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905", + "order": null }, { "name": "dummy3", "styles": [ { "id": 302, - "name": "style3-1" + "name": "style3-1", + "order": null }, { "id": 303, - "name": "style3-2" + "name": "style3-2", + "order": null } ], "version": "0.0.1", - "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3" + "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3", + "order": null } ]''' stderr.windows = ''' @@ -89,37 +96,44 @@ metas = ''' "styles": [ { "id": 0, - "name": "style1" + "name": "style1", + "order": null } ], "version": "0.0.1", - "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7" + "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", + "order": null }, { "name": "dummy2", "styles": [ { "id": 1, - "name": "style2" + "name": "style2", + "order": null } ], "version": "0.0.1", - "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905" + "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905", + "order": null }, { "name": "dummy3", "styles": [ { "id": 302, - "name": "style3-1" + "name": "style3-1", + "order": null }, { "id": 303, - "name": "style3-2" + "name": "style3-2", + "order": null } ], "version": "0.0.1", - "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3" + "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3", + "order": null } ]''' stderr.windows = ''' diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java index 05c1a11b2..ba4881566 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java @@ -4,6 +4,7 @@ import com.google.gson.annotations.Expose; import com.google.gson.annotations.SerializedName; import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; /** 音声モデル。 */ public class VoiceModel extends Dll { @@ -68,6 +69,16 @@ public static class SpeakerMeta { @Nonnull public final String version; + /** + * 話者の順番。 + * + *

{@code SpeakerMeta}の列は、この値に対して昇順に並んでいるべきである。 + */ + @SerializedName("order") + @Expose + @Nullable + public final Integer order; + private SpeakerMeta() { // GSONからコンストラクトするため、このメソッドは呼ばれることは無い。 // このメソッドは@Nonnullを満たすために必要。 @@ -75,6 +86,7 @@ private SpeakerMeta() { this.styles = new StyleMeta[0]; this.speakerUuid = ""; this.version = ""; + this.order = null; } } @@ -91,9 +103,20 @@ public static class StyleMeta { @Expose public final int id; + /** + * 話者の順番。 + * + *

{@link SpeakerMeta#styles}の列は、この値に対して昇順に並んでいるべきである。 + */ + @SerializedName("order") + @Expose + @Nullable + public final Integer order; + private StyleMeta() { this.name = ""; this.id = 0; + this.order = null; } } } diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_models.py b/crates/voicevox_core_python_api/python/voicevox_core/_models.py index 195154629..c72bbcbf0 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_models.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/_models.py @@ -44,6 +44,13 @@ class StyleMeta: id: StyleId """スタイルID。""" + order: Optional[int] = None + """ + 話者の順番。 + + :attr:`SpeakerMeta.styles` は、この値に対して昇順に並んでいるべきである。 + """ + @pydantic.dataclasses.dataclass class SpeakerMeta: @@ -61,6 +68,13 @@ class SpeakerMeta: version: StyleVersion """話者のUUID。""" + order: Optional[int] = None + """ + 話者の順番。 + + ``SpeakerMeta`` の列は、この値に対して昇順に並んでいるべきである。 + """ + @pydantic.dataclasses.dataclass class SupportedDevices: