From c6e093561b50c08b9dba6b9f0222e9bf305520f6 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Tue, 27 Aug 2024 01:00:20 +0900 Subject: [PATCH 1/9] change: liberate VOICEVOX CORE --- .github/workflows/build_and_deploy.yml | 81 ++----------------- .../workflows/build_and_deploy_downloader.yml | 7 +- .github/workflows/test.yml | 11 +-- Cargo.lock | 4 +- Cargo.toml | 4 +- crates/voicevox_core/src/devices.rs | 7 +- crates/voicevox_core/src/infer.rs | 8 +- crates/voicevox_core/src/infer/model_file.rs | 5 -- .../src/infer/runtimes/onnxruntime.rs | 37 +++++---- crates/voicevox_core/src/infer/session_set.rs | 8 +- crates/voicevox_core/src/lib.rs | 2 +- crates/voicevox_core/src/manifest.rs | 13 ++- crates/voicevox_core/src/synthesizer.rs | 10 +-- crates/voicevox_core/src/voice_model.rs | 81 ++++++++++++------- .../src/compatible_engine.rs | 13 +++ crates/voicevox_core_c_api/src/lib.rs | 2 +- .../voicevox_core_c_api/tests/e2e/log_mask.rs | 19 ++++- .../tests/e2e/snapshots.toml | 18 ++--- .../tests/e2e/testcases/compatible_engine.rs | 2 +- ...ble_engine_load_model_before_initialize.rs | 2 +- .../tests/e2e/testcases/global_info.rs | 36 +++++++-- .../tests/e2e/testcases/simple_tts.rs | 37 +++++++-- .../testcases/synthesizer_new_output_json.rs | 30 +++++-- .../e2e/testcases/tts_via_audio_query.rs | 37 +++++++-- .../tests/e2e/testcases/user_dict_load.rs | 30 +++++-- .../e2e/testcases/user_dict_manipulate.rs | 2 +- .../hiroshiba/voicevoxcore/Onnxruntime.java | 2 +- .../jp/hiroshiba/voicevoxcore/TestUtils.java | 4 +- .../python/test/conftest.py | 4 +- .../python/voicevox_core/_rust/asyncio.pyi | 2 +- .../python/voicevox_core/_rust/blocking.pyi | 2 +- model/sample.vvm/manifest.json | 15 +++- 32 files changed, 304 insertions(+), 231 deletions(-) delete mode 100644 crates/voicevox_core/src/infer/model_file.rs diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml index 9283b273d..4f55d14b5 100644 --- a/.github/workflows/build_and_deploy.yml +++ b/.github/workflows/build_and_deploy.yml @@ -16,11 +16,6 @@ on: type: boolean required: false default: false - is_production: - description: "製品版をビルドする" - type: boolean - required: false - default: false release: types: - published @@ -158,7 +153,7 @@ jobs: build_and_deploy: needs: config - environment: ${{ inputs.is_production && 'production' || '' }} # 製品版のenvironment + environment: ${{ inputs.code_signing && 'production' || '' }} # コード署名用のenvironment strategy: matrix: include: ${{ fromJson(needs.config.outputs.includes) }} @@ -166,22 +161,7 @@ jobs: env: ASSET_NAME: voicevox_core-${{ matrix.artifact_name }}-${{ needs.config.outputs.version }} steps: - - uses: actions/checkout@v4 # 製品版ではない場合 - if: ${{ !inputs.is_production }} - - uses: actions/checkout@v4 # 製品版の場合 - if: inputs.is_production - with: - fetch-depth: 0 # 全履歴取得 - token: ${{ secrets.PRODUCTION_GITHUB_TOKEN }} - - name: Merge production branch - if: inputs.is_production - shell: bash - run: | - ( - git remote add private ${{ secrets.PRODUCTION_REPOSITORY_URL }} - git fetch private refs/tags/${{ env.PRODUCTION_REPOSITORY_TAG }} - git -c user.name=dummy -c user.email=dummy@dummy.dummy merge FETCH_HEAD - ) > /dev/null 2>&1 + - uses: actions/checkout@v4 - name: Set up Python 3.8 if: matrix.python_whl uses: actions/setup-python@v5 @@ -206,18 +186,6 @@ jobs: run: | echo "$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin" >> "$GITHUB_PATH" echo "AR_${{ matrix.target }}=llvm-ar" >> "$GITHUB_ENV" - - name: Checkout VOICEVOX RESOURCE - if: inputs.is_production - uses: actions/checkout@v4 - with: - repository: VOICEVOX/voicevox_resource - ref: ${{ env.VOICEVOX_RESOURCE_VERSION }} - path: download/resource - - name: Raplace resource - if: inputs.is_production - shell: bash - run: - mv -f download/resource/core/README.md ./README.md - name: Install cargo-binstall uses: taiki-e/install-action@cargo-binstall - name: Install cargo-edit @@ -228,7 +196,6 @@ jobs: if ${{ matrix.python_whl }}; then cargo set-version "$VERSION" -p voicevox_core_python_api; fi - name: cache target uses: Swatinem/rust-cache@v2 - if: ${{ !inputs.is_production }} - name: build voicevox_core_c_api shell: bash run: | @@ -236,14 +203,7 @@ jobs: plain-cdylib) linking=load-onnxruntime ;; ios-xcframework) linking=link-onnxruntime ;; esac - function build() { - cargo build -p voicevox_core_c_api -vv --features "$linking" --target ${{ matrix.target }} --release - } - if ${{ !inputs.is_production }}; then - build - else - build > /dev/null 2>&1 - fi + cargo build -p voicevox_core_c_api -vv --features "$linking" --target ${{ matrix.target }} --release env: RUSTFLAGS: -C panic=abort - name: build voicevox_core_python_api @@ -254,26 +214,11 @@ jobs: pip install --upgrade poetry poetry config virtualenvs.create false (cd crates/voicevox_core_python_api && poetry install --with dev) - function build() { - maturin build --manifest-path ./crates/voicevox_core_python_api/Cargo.toml --target ${{ matrix.target }} --release - } - if ${{ !inputs.is_production }}; then - build - else - build > /dev/null 2>&1 - fi + maturin build --manifest-path ./crates/voicevox_core_python_api/Cargo.toml --target ${{ matrix.target }} --release echo "whl=$(find ./target/wheels -type f)" >> "$GITHUB_OUTPUT" - name: build voicevox_core_java_api if: contains(matrix.target, 'android') - run: | - function build() { - cargo build -p voicevox_core_java_api -vv --target ${{ matrix.target }} --release - } - if ${{ !inputs.is_production }}; then - build - else - build > /dev/null 2>&1 - fi + run: cargo build -p voicevox_core_java_api -vv --target ${{ matrix.target }} --release - name: Organize artifact run: | mkdir -p "artifact/${{ env.ASSET_NAME }}" @@ -377,25 +322,13 @@ jobs: ${{ env.ASSET_NAME }}.zip target_commitish: ${{ github.sha }} - deploy_model: + deploy_sample_model: runs-on: ubuntu-latest needs: config env: - ASSET_NAME: model-${{ needs.config.outputs.version }} + ASSET_NAME: sample-model-${{ needs.config.outputs.version }} steps: - uses: actions/checkout@v4 - - name: Checkout VOICEVOX FAT RESOURCE - if: inputs.is_production - uses: actions/checkout@v4 - with: - repository: VOICEVOX/voicevox_fat_resource - ref: ${{ env.VOICEVOX_FAT_RESOURCE_VERSION }} - path: download/fat_resource - - name: Raplace resource - if: inputs.is_production - shell: bash - run: - rm -r ./model; mv download/fat_resource/core/model ./model - name: Create artifact run: | mkdir "artifact" diff --git a/.github/workflows/build_and_deploy_downloader.yml b/.github/workflows/build_and_deploy_downloader.yml index 20fd4d63a..256e17bd6 100644 --- a/.github/workflows/build_and_deploy_downloader.yml +++ b/.github/workflows/build_and_deploy_downloader.yml @@ -11,11 +11,6 @@ on: type: boolean required: false default: false - is_production: - description: "製品版をビルドする" - type: boolean - required: false - default: false release: types: - published @@ -42,7 +37,7 @@ defaults: jobs: deploy_and_deploy_downloader: - environment: ${{ inputs.is_production && 'production' || '' }} # コード署名用のenvironment + environment: ${{ inputs.code_signing && 'production' || '' }} # コード署名用のenvironment strategy: matrix: include: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d84d16442..f4c4d6ab1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -284,17 +284,12 @@ jobs: - run: cargo build -p test_util -vv # build scriptにより/crates/test_util/data/の生成 - run: poetry run maturin build --locked - run: poetry run maturin develop --locked - - name: 必要なDLLをコピーしてpytestを実行 - run: | - cp -v ../../target/debug/onnxruntime.dll . || true - cp -v ../../target/debug/libonnxruntime.so.* . || true - cp -v ../../target/debug/libonnxruntime.*.dylib . || true - - poetry run pytest + - name: pytestを実行 + run: poetry run pytest - name: Exampleを実行 run: | for file in ../../example/python/run{,-asyncio}.py; do - poetry run python "$file" ../test_util/data/model/sample.vvm --dict-dir ../test_util/data/open_jtalk_dic_utf_8-1.11 + poetry run python "$file" ../test_util/data/model/sample.vvm --dict-dir ../test_util/data/open_jtalk_dic_utf_8-1.11 --onnxruntime ../test_util/data/lib/*onnxruntime* done build-and-test-java-api: strategy: diff --git a/Cargo.lock b/Cargo.lock index 7d70c15b8..277938b89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4212,7 +4212,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "voicevox-ort" version = "2.0.0-rc.4" -source = "git+https://github.com/VOICEVOX/ort.git?rev=8627833456a69e7841ae2a29fd184752df8de8d9#8627833456a69e7841ae2a29fd184752df8de8d9" +source = "git+https://github.com/qryxip/ort.git?rev=16a0601123804e5b281df251c3c2461abe222cc1#16a0601123804e5b281df251c3c2461abe222cc1" dependencies = [ "anyhow", "half", @@ -4229,7 +4229,7 @@ dependencies = [ [[package]] name = "voicevox-ort-sys" version = "2.0.0-rc.4" -source = "git+https://github.com/VOICEVOX/ort.git?rev=8627833456a69e7841ae2a29fd184752df8de8d9#8627833456a69e7841ae2a29fd184752df8de8d9" +source = "git+https://github.com/qryxip/ort.git?rev=16a0601123804e5b281df251c3c2461abe222cc1#16a0601123804e5b281df251c3c2461abe222cc1" dependencies = [ "flate2", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index d72625c5f..e75379803 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,8 +91,8 @@ windows = "0.43.0" zip = "0.6.3" [workspace.dependencies.voicevox-ort] -git = "https://github.com/VOICEVOX/ort.git" -rev = "8627833456a69e7841ae2a29fd184752df8de8d9" +git = "https://github.com/qryxip/ort.git" +rev = "16a0601123804e5b281df251c3c2461abe222cc1" [workspace.dependencies.open_jtalk] git = "https://github.com/VOICEVOX/open_jtalk-rs.git" diff --git a/crates/voicevox_core/src/devices.rs b/crates/voicevox_core/src/devices.rs index f3027e741..6262b60df 100644 --- a/crates/voicevox_core/src/devices.rs +++ b/crates/voicevox_core/src/devices.rs @@ -57,12 +57,7 @@ fn test_gpu( /// use voicevox_core::{tokio::Onnxruntime, SupportedDevices}; /// /// # voicevox_core::blocking::Onnxruntime::load_once() -/// # .filename(if cfg!(windows) { -/// # // Windows\System32\onnxruntime.dllを回避 -/// # test_util::ONNXRUNTIME_DYLIB_PATH -/// # } else { -/// # voicevox_core::blocking::Onnxruntime::LIB_VERSIONED_FILENAME -/// # }) +/// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) /// # .exec()?; /// # /// let onnxruntime = Onnxruntime::get().unwrap(); diff --git a/crates/voicevox_core/src/infer.rs b/crates/voicevox_core/src/infer.rs index 112ca6b53..52ece3f91 100644 --- a/crates/voicevox_core/src/infer.rs +++ b/crates/voicevox_core/src/infer.rs @@ -1,5 +1,4 @@ pub(crate) mod domains; -mod model_file; pub(crate) mod runtimes; pub(crate) mod session_set; @@ -13,6 +12,7 @@ use thiserror::Error; use crate::{ devices::{DeviceSpec, GpuSpec}, + voice_model::ModelBytes, StyleType, SupportedDevices, }; @@ -33,7 +33,7 @@ pub(crate) trait InferenceRuntime: 'static { #[allow(clippy::type_complexity)] fn new_session( &self, - model: impl FnOnce() -> std::result::Result, DecryptModelError>, + model: &ModelBytes, options: InferenceSessionOptions, ) -> anyhow::Result<( Self::Session, @@ -204,7 +204,3 @@ pub(crate) enum ExtractError { #[error(transparent)] Shape(#[from] ShapeError), } - -#[derive(Error, Debug)] -#[error("不正なモデルファイルです")] -pub(crate) struct DecryptModelError; diff --git a/crates/voicevox_core/src/infer/model_file.rs b/crates/voicevox_core/src/infer/model_file.rs deleted file mode 100644 index 470ce9a6b..000000000 --- a/crates/voicevox_core/src/infer/model_file.rs +++ /dev/null @@ -1,5 +0,0 @@ -use super::DecryptModelError; - -pub(super) fn decrypt(content: &[u8]) -> std::result::Result, DecryptModelError> { - Ok(content.to_owned()) -} diff --git a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs index 7d975f7f7..06c3dabc6 100644 --- a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs +++ b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs @@ -11,11 +11,12 @@ use ort::{ use crate::{ devices::{DeviceSpec, GpuSpec, SupportedDevices}, error::ErrorRepr, + voice_model::ModelBytes, }; use super::super::{ - DecryptModelError, InferenceRuntime, InferenceSessionOptions, InputScalarKind, - OutputScalarKind, OutputTensor, ParamInfo, PushInputTensor, + InferenceRuntime, InferenceSessionOptions, InputScalarKind, OutputScalarKind, OutputTensor, + ParamInfo, PushInputTensor, }; // TODO: `trait AsyncRuntime`みたいなものを作って抽象化しながら同期版と非同期版に別個の役割を @@ -62,7 +63,7 @@ impl InferenceRuntime for self::blocking::Onnxruntime { fn new_session( &self, - model: impl FnOnce() -> std::result::Result, DecryptModelError>, + model: &ModelBytes, options: InferenceSessionOptions, ) -> anyhow::Result<( Self::Session, @@ -86,8 +87,10 @@ impl InferenceRuntime for self::blocking::Onnxruntime { } }; - let model = model()?; - let sess = builder.commit_from_memory(&{ model })?; + let sess = match model { + ModelBytes::Onnx(onnx) => builder.commit_from_memory(onnx), + ModelBytes::Bin(bin) => builder.commit_from_vv_bin(bin), + }?; let input_param_infos = sess .inputs @@ -261,12 +264,10 @@ pub(crate) mod blocking { /// # use voicevox_core as another_lib; /// # /// # fn main() -> anyhow::Result<()> { - /// # if cfg!(windows) { - /// # // Windows\System32\onnxruntime.dllを回避 - /// # voicevox_core::blocking::Onnxruntime::load_once() - /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) - /// # .exec()?; - /// # } + /// # voicevox_core::blocking::Onnxruntime::load_once() + /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) + /// # .exec()?; + /// # /// let ort1 = voicevox_core::blocking::Onnxruntime::load_once().exec()?; /// let ort2 = another_lib::tokio::Onnxruntime::get().expect("`ort1`と同一のはず"); /// assert_eq!(ptr_addr(ort1), ptr_addr(ort2)); @@ -289,7 +290,7 @@ pub(crate) mod blocking { /// ONNX Runtimeのライブラリ名。 #[cfg(feature = "load-onnxruntime")] #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] - pub const LIB_NAME: &'static str = "onnxruntime"; + pub const LIB_NAME: &'static str = "voicevox_onnxruntime"; /// 推奨されるONNX Runtimeのバージョン。 #[cfg(feature = "load-onnxruntime")] @@ -450,12 +451,10 @@ pub(crate) mod tokio { /// # /// # #[tokio::main] /// # async fn main() -> anyhow::Result<()> { - /// # if cfg!(windows) { - /// # // Windows\System32\onnxruntime.dllを回避 - /// # voicevox_core::blocking::Onnxruntime::load_once() - /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) - /// # .exec()?; - /// # } + /// # voicevox_core::blocking::Onnxruntime::load_once() + /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) + /// # .exec()?; + /// # /// let ort1 = voicevox_core::tokio::Onnxruntime::load_once().exec().await?; /// let ort2 = another_lib::blocking::Onnxruntime::get().expect("`ort1`と同一のはず"); /// assert_eq!(ptr_addr(ort1), ptr_addr(ort2)); @@ -477,7 +476,7 @@ pub(crate) mod tokio { #[cfg(feature = "load-onnxruntime")] #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] // ブロッキング版と等しいことはテストで担保 - pub const LIB_NAME: &'static str = "onnxruntime"; + pub const LIB_NAME: &'static str = "voicevox_onnxruntime"; /// 推奨されるONNX Runtimeのバージョン。 #[cfg(feature = "load-onnxruntime")] diff --git a/crates/voicevox_core/src/infer/session_set.rs b/crates/voicevox_core/src/infer/session_set.rs index e94fff962..03af11864 100644 --- a/crates/voicevox_core/src/infer/session_set.rs +++ b/crates/voicevox_core/src/infer/session_set.rs @@ -4,10 +4,10 @@ use anyhow::bail; use enum_map::{Enum as _, EnumMap}; use itertools::Itertools as _; -use crate::error::ErrorRepr; +use crate::{error::ErrorRepr, voice_model::ModelBytes}; use super::{ - model_file, InferenceDomain, InferenceInputSignature, InferenceOperation, InferenceRuntime, + InferenceDomain, InferenceInputSignature, InferenceOperation, InferenceRuntime, InferenceSessionOptions, InferenceSignature, ParamInfo, }; @@ -18,7 +18,7 @@ pub(crate) struct InferenceSessionSet( impl InferenceSessionSet { pub(crate) fn new( rt: &R, - model_bytes: &EnumMap>, + model_bytes: &EnumMap, options: &EnumMap, ) -> anyhow::Result { let mut sessions = model_bytes @@ -28,7 +28,7 @@ impl InferenceSessionSet { ::PARAM_INFOS[op]; let (sess, actual_input_param_infos, actual_output_param_infos) = - rt.new_session(|| model_file::decrypt(model_bytes), options[op])?; + rt.new_session(model_bytes, options[op])?; check_param_infos(expected_input_param_infos, &actual_input_param_infos)?; check_param_infos(expected_output_param_infos, &actual_output_param_infos)?; diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index fedf538cf..8377f2c4e 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -10,7 +10,7 @@ //! 開きます。[CUDA]と[DirectML]が利用できます。 //! - **`link-onnxruntime`**: ONNX Runtimeをロード時動的リンクします。iOSのような`dlopen`の利用が //! 困難な環境でのみこちらを利用するべきです。_Note_: -//! [動的リンク対象のライブラリ名]は`onnxruntime`で固定です。変更 +//! [動的リンク対象のライブラリ名]は`voicevox_onnxruntime`で固定です。変更 //! は`patchelf(1)`や`install_name_tool(1)`で行ってください。また、[ONNX RuntimeのGPU機能]を使う //! ことはできません。 //! diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs index 0808a6414..ae062a7c1 100644 --- a/crates/voicevox_core/src/manifest.rs +++ b/crates/voicevox_core/src/manifest.rs @@ -82,13 +82,20 @@ pub(crate) struct ManifestDomains { #[derive(Deserialize, Clone)] pub(crate) struct TalkManifest { - pub(crate) predict_duration_filename: String, - pub(crate) predict_intonation_filename: String, - pub(crate) decode_filename: String, + pub(crate) predict_duration: ModelFile, + pub(crate) predict_intonation: ModelFile, + pub(crate) decode: ModelFile, #[serde(default)] pub(crate) style_id_to_inner_voice_id: StyleIdToInnerVoiceId, } +#[derive(Deserialize, Clone)] +#[serde(tag = "type", rename_all = "lowercase")] +pub(crate) enum ModelFile { + Onnx { filename: String }, + Bin { filename: String }, +} + #[serde_as] #[derive(Default, Clone, Deref, Deserialize)] #[deref(forward)] diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index 8dcbf848a..00cb0a2de 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -126,12 +126,10 @@ pub(crate) mod blocking { /// AccelerationMode, InitializeOptions, /// }; /// - /// # if cfg!(windows) { - /// # // Windows\System32\onnxruntime.dllを回避 - /// # voicevox_core::blocking::Onnxruntime::load_once() - /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) - /// # .exec()?; - /// # } + /// # voicevox_core::blocking::Onnxruntime::load_once() + /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) + /// # .exec()?; + /// # /// let mut syntesizer = Synthesizer::new( /// Onnxruntime::load_once().exec().await?, /// Arc::new(OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap()), diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 48477256c..4b8e4fadd 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -27,7 +27,7 @@ use std::path::{Path, PathBuf}; pub type RawVoiceModelId = Uuid; pub(crate) type ModelBytesWithInnerVoiceIdsByDomain = - (Option<(StyleIdToInnerVoiceId, EnumMap>)>,); + (Option<(StyleIdToInnerVoiceId, EnumMap)>,); /// 音声モデルID。 #[derive( @@ -98,6 +98,11 @@ impl VoiceModelHeader { } } +pub(crate) enum ModelBytes { + Onnx(Vec), + Bin(Vec), +} + impl ManifestDomains { /// manifestとして対応していない`StyleType`に対してエラーを発する。 /// @@ -156,11 +161,11 @@ pub(crate) mod blocking { use crate::{ error::{LoadModelError, LoadModelErrorKind, LoadModelResult}, infer::domains::InferenceDomainMap, - manifest::{Manifest, TalkManifest}, + manifest::{Manifest, ModelFile, TalkManifest}, VoiceModelMeta, }; - use super::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; + use super::{ModelBytes, ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; /// 音声モデル。 /// @@ -184,21 +189,17 @@ pub(crate) mod blocking { .as_ref() .map( |TalkManifest { - predict_duration_filename, - predict_intonation_filename, - decode_filename, + predict_duration, + predict_intonation, + decode, style_id_to_inner_voice_id, }| { - let model_bytes = [ - predict_duration_filename, - predict_intonation_filename, - decode_filename, - ] - .into_par_iter() - .map(|filename| reader.read_vvm_entry(filename)) - .collect::, _>>()? - .try_into() - .unwrap_or_else(|_| panic!("should be same length")); + let model_bytes = [predict_duration, predict_intonation, decode] + .into_par_iter() + .map(|entry| reader.read_model_bytes(entry)) + .collect::, _>>()? + .try_into() + .unwrap_or_else(|_| panic!("should be same length")); let model_bytes = EnumMap::from_array(model_bytes); @@ -269,6 +270,13 @@ pub(crate) mod blocking { }) } + fn read_model_bytes(&self, entry: &ModelFile) -> LoadModelResult { + match entry { + ModelFile::Onnx { filename } => self.read_vvm_entry(filename).map(ModelBytes::Onnx), + ModelFile::Bin { filename } => self.read_vvm_entry(filename).map(ModelBytes::Bin), + } + } + fn read_vvm_entry(&self, filename: &str) -> LoadModelResult> { (|| { let mut reader = self.borrow_reader().clone(); @@ -304,11 +312,11 @@ pub(crate) mod tokio { use crate::{ error::{LoadModelError, LoadModelErrorKind, LoadModelResult}, infer::domains::InferenceDomainMap, - manifest::{Manifest, TalkManifest}, + manifest::{Manifest, ModelFile, TalkManifest}, Result, VoiceModelMeta, }; - use super::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; + use super::{ModelBytes, ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; /// 音声モデル。 /// @@ -326,9 +334,9 @@ pub(crate) mod tokio { let talk = OptionFuture::from(self.header.manifest.domains().talk.as_ref().map( |TalkManifest { - predict_duration_filename, - predict_intonation_filename, - decode_filename, + predict_duration, + predict_intonation, + decode, style_id_to_inner_voice_id, }| async { let ( @@ -336,9 +344,9 @@ pub(crate) mod tokio { predict_duration_model_result, predict_intonation_model_result, ) = join3( - reader.read_vvm_entry(decode_filename), - reader.read_vvm_entry(predict_duration_filename), - reader.read_vvm_entry(predict_intonation_filename), + reader.read_model_bytes(decode), + reader.read_model_bytes(predict_duration), + reader.read_model_bytes(predict_intonation), ) .await; @@ -429,6 +437,17 @@ pub(crate) mod tokio { }) } + async fn read_model_bytes(&self, entry: &ModelFile) -> LoadModelResult { + match entry { + ModelFile::Onnx { filename } => { + self.read_vvm_entry(filename).await.map(ModelBytes::Onnx) + } + ModelFile::Bin { filename } => { + self.read_vvm_entry(filename).await.map(ModelBytes::Bin) + } + } + } + async fn read_vvm_entry(&self, filename: &str) -> LoadModelResult> { async { let me = self @@ -460,7 +479,7 @@ mod tests { use serde_json::json; use crate::{ - manifest::{ManifestDomains, TalkManifest}, + manifest::{ManifestDomains, ModelFile, TalkManifest}, SpeakerMeta, StyleType, }; @@ -503,9 +522,15 @@ mod tests { } static TALK_MANIFEST: LazyLock = LazyLock::new(|| TalkManifest { - predict_duration_filename: "".to_owned(), - predict_intonation_filename: "".to_owned(), - decode_filename: "".to_owned(), + predict_duration: ModelFile::Onnx { + filename: "".to_owned(), + }, + predict_intonation: ModelFile::Onnx { + filename: "".to_owned(), + }, + decode: ModelFile::Onnx { + filename: "".to_owned(), + }, style_id_to_inner_voice_id: Default::default(), }); diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs index 68b836f2f..69c8e9e26 100644 --- a/crates/voicevox_core_c_api/src/compatible_engine.rs +++ b/crates/voicevox_core_c_api/src/compatible_engine.rs @@ -7,6 +7,7 @@ use std::{ use libc::c_int; +use tracing::warn; use voicevox_core::{StyleId, VoiceModelId, __internal::interop::PerformInference as _}; use crate::{helpers::display_error, init_logger_once}; @@ -26,8 +27,20 @@ macro_rules! ensure_initialized { static ERROR_MESSAGE: LazyLock> = LazyLock::new(|| Mutex::new(String::new())); static ONNXRUNTIME: LazyLock<&'static voicevox_core::blocking::Onnxruntime> = LazyLock::new(|| { + let alt_onnxruntime_filename = voicevox_core::blocking::Onnxruntime::LIB_VERSIONED_FILENAME + .replace( + voicevox_core::blocking::Onnxruntime::LIB_NAME, + "onnxruntime", + ); voicevox_core::blocking::Onnxruntime::load_once() .exec() + .or_else(|err| { + warn!("{err}"); + warn!("falling back to `{alt_onnxruntime_filename}`"); + voicevox_core::blocking::Onnxruntime::load_once() + .filename(alt_onnxruntime_filename) + .exec() + }) .unwrap_or_else(|err| { display_error(&err); panic!("ONNX Runtimeをロードもしくは初期化ができなかったため、クラッシュします"); diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index f35179807..b7b3e8e74 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -64,7 +64,7 @@ fn init_logger_once() { .with_env_filter(if env::var_os(EnvFilter::DEFAULT_ENV).is_some() { EnvFilter::from_default_env() } else { - "error,voicevox_core=info,voicevox_core_c_api=info,ort=info".into() + "error,voicevox_core=info,voicevox_core_c_api=info,ort=warn".into() }) .with_timer(local_time as fn(&mut Writer<'_>) -> _) .with_ansi(ansi) diff --git a/crates/voicevox_core_c_api/tests/e2e/log_mask.rs b/crates/voicevox_core_c_api/tests/e2e/log_mask.rs index 4e6d26482..7a19ca19f 100644 --- a/crates/voicevox_core_c_api/tests/e2e/log_mask.rs +++ b/crates/voicevox_core_c_api/tests/e2e/log_mask.rs @@ -1,5 +1,6 @@ use std::sync::LazyLock; +use const_format::concatcp; use regex::{Regex, Replacer}; use crate::assert_cdylib::Utf8Output; @@ -21,10 +22,22 @@ impl Utf8Output { ) } - pub(crate) fn mask_onnxruntime_version(self) -> Self { + pub(crate) fn mask_onnxruntime_filename(self) -> Self { self.mask_stderr( - static_regex!(regex::escape(ort::downloaded_version!())), - "{onnxruntime_version}", + static_regex!(regex::escape( + const { + if cfg!(windows) { + r"onnxruntime.dll" + } else if cfg!(target_os = "linux") { + concatcp!("libonnxruntime.so.", ort::downloaded_version!()) + } else if cfg!(target_os = "macos") { + concatcp!("libonnxruntime.", ort::downloaded_version!(), ".dylib") + } else { + panic!("unsupported") + } + } + )), + "{onnxruntime_filename}", ) } diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml index 17ccd61f8..02f6e4f39 100644 --- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml +++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml @@ -51,12 +51,14 @@ metas = ''' } ]''' stderr.windows = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +{timestamp} WARN voicevox_core::compatible_engine: ONNX Runtimeのロードまたは初期化ができませんでした +{timestamp} WARN voicevox_core::compatible_engine: falling back to `{onnxruntime_filename}` {windows-video-cards} {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' stderr.unix = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +{timestamp} WARN voicevox_core::compatible_engine: ONNX Runtimeのロードまたは初期化ができませんでした +{timestamp} WARN voicevox_core::compatible_engine: falling back to `{onnxruntime_filename}` {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' @@ -90,19 +92,15 @@ result_messages.24 = "ユーザー辞書の単語のバリデーションに失 result_messages.25 = "UUIDの変換に失敗しました" # FIXME: 26, 27, 28が抜けている result_messages.29 = "推論ライブラリのロードまたは初期化ができませんでした" -stderr = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' -''' +stderr = '' [simple_tts] output."こんにちは、音声合成の世界へようこそ".wav_length = 176172 stderr.windows = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' stderr.unix = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' @@ -159,36 +157,30 @@ metas = ''' } ]''' stderr.windows = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' stderr.unix = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' [tts_via_audio_query] output."こんにちは、音声合成の世界へようこそ".wav_length = 176172 stderr.windows = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' stderr.unix = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' # FIXME: "user_dict_load"のはず [user_dict] stderr.windows = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' stderr.unix = ''' -{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {timestamp} INFO voicevox_core::synthesizer::blocking: CPUを利用します ''' diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs index e69ad68fd..581edece5 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs @@ -97,7 +97,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs index 7b709a83d..2d7ade9fd 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs @@ -33,7 +33,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs index a36a7d9bd..fe9d518a1 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs @@ -1,10 +1,11 @@ -use std::{collections::HashMap, ffi::CStr, mem::MaybeUninit, str, sync::LazyLock}; +use std::{collections::HashMap, env, ffi::CStr, mem::MaybeUninit, str, sync::LazyLock}; use assert_cmd::assert::AssertResult; +use const_format::concatcp; use libloading::Library; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; -use test_util::c_api::{self, CApi, VoicevoxResultCode}; +use test_util::c_api::{self, CApi, VoicevoxLoadOnnxruntimeOptions, VoicevoxResultCode}; use voicevox_core::SupportedDevices; use crate::{ @@ -29,10 +30,31 @@ impl assert_cdylib::TestCase for TestCase { let onnxruntime = { let mut onnxruntime = MaybeUninit::uninit(); - assert_ok(lib.voicevox_onnxruntime_load_once( - lib.voicevox_make_default_load_onnxruntime_options(), - onnxruntime.as_mut_ptr(), - )); + let _ = const { + if true { + 0 + } else { + panic!(); + } + }; + assert_ok( + lib.voicevox_onnxruntime_load_once( + VoicevoxLoadOnnxruntimeOptions { + filename: CStr::from_bytes_with_nul( + concatcp!( + env::consts::DLL_PREFIX, + "onnxruntime", + env::consts::DLL_SUFFIX, + '\0' + ) + .as_ref(), + ) + .expect("this ends with nul") + .as_ptr(), + }, + onnxruntime.as_mut_ptr(), + ), + ); onnxruntime.assume_init() }; @@ -89,7 +111,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs index a4381f74d..cacf8b186 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs @@ -1,10 +1,19 @@ -use std::{collections::HashMap, ffi::CString, mem::MaybeUninit, sync::LazyLock}; +use std::{ + collections::HashMap, + env, + ffi::{CStr, CString}, + mem::MaybeUninit, + sync::LazyLock, +}; use assert_cmd::assert::AssertResult; +use const_format::concatcp; use libloading::Library; use serde::{Deserialize, Serialize}; use test_util::{ - c_api::{self, CApi, VoicevoxInitializeOptions, VoicevoxResultCode}, + c_api::{ + self, CApi, VoicevoxInitializeOptions, VoicevoxLoadOnnxruntimeOptions, VoicevoxResultCode, + }, OPEN_JTALK_DIC_DIR, }; @@ -38,10 +47,24 @@ impl assert_cdylib::TestCase for TestCase { let onnxruntime = { let mut onnxruntime = MaybeUninit::uninit(); - assert_ok(lib.voicevox_onnxruntime_load_once( - lib.voicevox_make_default_load_onnxruntime_options(), - onnxruntime.as_mut_ptr(), - )); + assert_ok( + lib.voicevox_onnxruntime_load_once( + VoicevoxLoadOnnxruntimeOptions { + filename: CStr::from_bytes_with_nul( + concatcp!( + env::consts::DLL_PREFIX, + "onnxruntime", + env::consts::DLL_SUFFIX, + '\0' + ) + .as_ref(), + ) + .expect("this ends with nul") + .as_ptr(), + }, + onnxruntime.as_mut_ptr(), + ), + ); onnxruntime.assume_init() }; @@ -105,7 +128,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs index 9ac8f4b35..12ea35938 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs @@ -1,15 +1,19 @@ use std::{ + env, ffi::{CStr, CString}, mem::MaybeUninit, sync::LazyLock, }; use assert_cmd::assert::AssertResult; +use const_format::concatcp; use libloading::Library; use serde::{Deserialize, Serialize}; use test_util::{ - c_api::{self, CApi, VoicevoxInitializeOptions, VoicevoxResultCode}, + c_api::{ + self, CApi, VoicevoxInitializeOptions, VoicevoxLoadOnnxruntimeOptions, VoicevoxResultCode, + }, OPEN_JTALK_DIC_DIR, }; @@ -30,10 +34,24 @@ impl assert_cdylib::TestCase for TestCase { let onnxruntime = { let mut onnxruntime = MaybeUninit::uninit(); - assert_ok(lib.voicevox_onnxruntime_load_once( - lib.voicevox_make_default_load_onnxruntime_options(), - onnxruntime.as_mut_ptr(), - )); + assert_ok( + lib.voicevox_onnxruntime_load_once( + VoicevoxLoadOnnxruntimeOptions { + filename: CStr::from_bytes_with_nul( + concatcp!( + env::consts::DLL_PREFIX, + "onnxruntime", + env::consts::DLL_SUFFIX, + '\0' + ) + .as_ref(), + ) + .expect("this ends with nul") + .as_ptr(), + }, + onnxruntime.as_mut_ptr(), + ), + ); onnxruntime.assume_init() }; @@ -95,7 +113,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs index 0f2ff5fc8..b0386a038 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs @@ -1,10 +1,19 @@ -use std::{collections::HashMap, ffi::CString, mem::MaybeUninit, sync::LazyLock}; +use std::{ + collections::HashMap, + env, + ffi::{CStr, CString}, + mem::MaybeUninit, + sync::LazyLock, +}; use assert_cmd::assert::AssertResult; +use const_format::concatcp; use libloading::Library; use serde::{Deserialize, Serialize}; use test_util::{ - c_api::{self, CApi, VoicevoxInitializeOptions, VoicevoxResultCode}, + c_api::{ + self, CApi, VoicevoxInitializeOptions, VoicevoxLoadOnnxruntimeOptions, VoicevoxResultCode, + }, OPEN_JTALK_DIC_DIR, }; @@ -38,10 +47,24 @@ impl assert_cdylib::TestCase for TestCase { let onnxruntime = { let mut onnxruntime = MaybeUninit::uninit(); - assert_ok(lib.voicevox_onnxruntime_load_once( - lib.voicevox_make_default_load_onnxruntime_options(), - onnxruntime.as_mut_ptr(), - )); + assert_ok( + lib.voicevox_onnxruntime_load_once( + VoicevoxLoadOnnxruntimeOptions { + filename: CStr::from_bytes_with_nul( + concatcp!( + env::consts::DLL_PREFIX, + "onnxruntime", + env::consts::DLL_SUFFIX, + '\0' + ) + .as_ref(), + ) + .expect("this ends with nul") + .as_ptr(), + }, + onnxruntime.as_mut_ptr(), + ), + ); onnxruntime.assume_init() }; @@ -117,7 +140,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs index 64e062251..9d3a0dd32 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs @@ -1,15 +1,19 @@ // ユーザー辞書の登録によって読みが変化することを確認するテスト。 // 辞書ロード前後でAudioQueryのkanaが変化するかどうかで確認する。 +use std::env; use std::ffi::{CStr, CString}; use std::mem::MaybeUninit; use std::sync::LazyLock; use assert_cmd::assert::AssertResult; +use const_format::concatcp; use cstr::cstr; use libloading::Library; use serde::{Deserialize, Serialize}; -use test_util::c_api::{self, CApi, VoicevoxInitializeOptions, VoicevoxResultCode}; +use test_util::c_api::{ + self, CApi, VoicevoxInitializeOptions, VoicevoxLoadOnnxruntimeOptions, VoicevoxResultCode, +}; use test_util::OPEN_JTALK_DIC_DIR; use crate::{ @@ -56,10 +60,24 @@ impl assert_cdylib::TestCase for TestCase { let onnxruntime = { let mut onnxruntime = MaybeUninit::uninit(); - assert_ok(lib.voicevox_onnxruntime_load_once( - lib.voicevox_make_default_load_onnxruntime_options(), - onnxruntime.as_mut_ptr(), - )); + assert_ok( + lib.voicevox_onnxruntime_load_once( + VoicevoxLoadOnnxruntimeOptions { + filename: CStr::from_bytes_with_nul( + concatcp!( + env::consts::DLL_PREFIX, + "onnxruntime", + env::consts::DLL_SUFFIX, + '\0' + ) + .as_ref(), + ) + .expect("this ends with nul") + .as_ptr(), + }, + onnxruntime.as_mut_ptr(), + ), + ); onnxruntime.assume_init() }; @@ -135,7 +153,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs index d3c37f127..c310b85d8 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs @@ -141,7 +141,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() - .mask_onnxruntime_version() + .mask_onnxruntime_filename() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java index 9a9cbe133..d957f4a0f 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java @@ -20,7 +20,7 @@ */ public class Onnxruntime extends Dll { /** ONNX Runtimeのライブラリ名。 */ - public static final String LIB_NAME = "onnxruntime"; + public static final String LIB_NAME = "voicevox_onnxruntime"; /** 推奨されるONNX Runtimeのバージョン。 */ public static final String LIB_VERSION = "1.17.3"; diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java index 9ab731cd9..3f85a5f84 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java @@ -16,7 +16,9 @@ VoiceModel loadModel() { } Onnxruntime loadOnnxruntime() { - final String FILENAME = "../../test_util/data/lib/" + Onnxruntime.LIB_VERSIONED_FILENAME; + final String FILENAME = + "../../test_util/data/lib/" + + Onnxruntime.LIB_VERSIONED_FILENAME.replace("voicevox_onnxruntime", "onnxruntime"); try { return Onnxruntime.loadOnce().filename(FILENAME).exec(); diff --git a/crates/voicevox_core_python_api/python/test/conftest.py b/crates/voicevox_core_python_api/python/test/conftest.py index 430e415c1..c55fcb4a4 100644 --- a/crates/voicevox_core_python_api/python/test/conftest.py +++ b/crates/voicevox_core_python_api/python/test/conftest.py @@ -13,7 +13,9 @@ / "test_util" / "data" / "lib" - / voicevox_core.blocking.Onnxruntime.LIB_VERSIONED_FILENAME + / voicevox_core.blocking.Onnxruntime.LIB_VERSIONED_FILENAME.replace( + "voicevox_onnxruntime", "onnxruntime" + ) ) open_jtalk_dic_dir = ( root_dir.parent.parent.parent / "test_util" / "data" / "open_jtalk_dic_utf_8-1.11" diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi index 7652a7d2c..6dd7a2afc 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi @@ -59,7 +59,7 @@ class Onnxruntime: # ここの定数値が本物と合致するかどうかは、test_type_stub_consts.pyで担保する。 - LIB_NAME: str = "onnxruntime" + LIB_NAME: str = "voicevox_onnxruntime" """ONNX Runtimeのライブラリ名。""" LIB_VERSION: str = "1.17.3" diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi index 602ff31bc..a78f148d9 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi @@ -59,7 +59,7 @@ class Onnxruntime: # ここの定数値が本物と合致するかどうかは、test_type_stub_consts.pyで担保する。 - LIB_NAME: str = "onnxruntime" + LIB_NAME: str = "voicevox_onnxruntime" """ONNX Runtimeのライブラリ名。""" LIB_VERSION: str = "1.17.3" diff --git a/model/sample.vvm/manifest.json b/model/sample.vvm/manifest.json index db2ca92c1..095adc082 100644 --- a/model/sample.vvm/manifest.json +++ b/model/sample.vvm/manifest.json @@ -3,9 +3,18 @@ "id": "018fa5b1-146c-71e9-b523-6f6dabcf05fe", "metas_filename": "metas.json", "talk": { - "predict_duration_filename": "predict_duration.onnx", - "predict_intonation_filename": "predict_intonation.onnx", - "decode_filename": "decode.onnx", + "predict_duration": { + "type": "onnx", + "filename": "predict_duration.onnx" + }, + "predict_intonation": { + "type": "onnx", + "filename": "predict_intonation.onnx" + }, + "decode": { + "type": "onnx", + "filename": "decode.onnx" + }, "style_id_to_inner_voice_id": { "302": 2, "303": 3 From 6e1cafbd948e97ae3899ae00873b33ca6d477c3d Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Sun, 1 Sep 2024 04:51:41 +0900 Subject: [PATCH 2/9] =?UTF-8?q?ort=E3=82=92=E3=82=A2=E3=83=83=E3=83=97?= =?UTF-8?q?=E3=83=87=E3=83=BC=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 277938b89..86d797ecc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4212,7 +4212,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "voicevox-ort" version = "2.0.0-rc.4" -source = "git+https://github.com/qryxip/ort.git?rev=16a0601123804e5b281df251c3c2461abe222cc1#16a0601123804e5b281df251c3c2461abe222cc1" +source = "git+https://github.com/qryxip/ort.git?rev=cb4acff0c978f3615a4dbaf2164d026185b88134#cb4acff0c978f3615a4dbaf2164d026185b88134" dependencies = [ "anyhow", "half", @@ -4229,7 +4229,7 @@ dependencies = [ [[package]] name = "voicevox-ort-sys" version = "2.0.0-rc.4" -source = "git+https://github.com/qryxip/ort.git?rev=16a0601123804e5b281df251c3c2461abe222cc1#16a0601123804e5b281df251c3c2461abe222cc1" +source = "git+https://github.com/qryxip/ort.git?rev=cb4acff0c978f3615a4dbaf2164d026185b88134#cb4acff0c978f3615a4dbaf2164d026185b88134" dependencies = [ "flate2", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index e75379803..0f09c15af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,7 +92,7 @@ zip = "0.6.3" [workspace.dependencies.voicevox-ort] git = "https://github.com/qryxip/ort.git" -rev = "16a0601123804e5b281df251c3c2461abe222cc1" +rev = "cb4acff0c978f3615a4dbaf2164d026185b88134" [workspace.dependencies.open_jtalk] git = "https://github.com/VOICEVOX/open_jtalk-rs.git" From a5f009c23cc56d316a3d9126f60092d3ea897f0e Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Sun, 1 Sep 2024 04:55:55 +0900 Subject: [PATCH 3/9] =?UTF-8?q?`bin`=20=E2=86=92=20`vv-bin`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/infer/runtimes/onnxruntime.rs | 2 +- crates/voicevox_core/src/manifest.rs | 4 ++-- crates/voicevox_core/src/voice_model.rs | 10 ++++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs index 06c3dabc6..736c5b364 100644 --- a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs +++ b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs @@ -89,7 +89,7 @@ impl InferenceRuntime for self::blocking::Onnxruntime { let sess = match model { ModelBytes::Onnx(onnx) => builder.commit_from_memory(onnx), - ModelBytes::Bin(bin) => builder.commit_from_vv_bin(bin), + ModelBytes::VvBin(bin) => builder.commit_from_vv_bin(bin), }?; let input_param_infos = sess diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs index ae062a7c1..633fbd28a 100644 --- a/crates/voicevox_core/src/manifest.rs +++ b/crates/voicevox_core/src/manifest.rs @@ -90,10 +90,10 @@ pub(crate) struct TalkManifest { } #[derive(Deserialize, Clone)] -#[serde(tag = "type", rename_all = "lowercase")] +#[serde(tag = "type", rename_all = "kebab-case")] pub(crate) enum ModelFile { Onnx { filename: String }, - Bin { filename: String }, + VvBin { filename: String }, } #[serde_as] diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 4b8e4fadd..ef3152425 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -100,7 +100,7 @@ impl VoiceModelHeader { pub(crate) enum ModelBytes { Onnx(Vec), - Bin(Vec), + VvBin(Vec), } impl ManifestDomains { @@ -273,7 +273,9 @@ pub(crate) mod blocking { fn read_model_bytes(&self, entry: &ModelFile) -> LoadModelResult { match entry { ModelFile::Onnx { filename } => self.read_vvm_entry(filename).map(ModelBytes::Onnx), - ModelFile::Bin { filename } => self.read_vvm_entry(filename).map(ModelBytes::Bin), + ModelFile::VvBin { filename } => { + self.read_vvm_entry(filename).map(ModelBytes::VvBin) + } } } @@ -442,8 +444,8 @@ pub(crate) mod tokio { ModelFile::Onnx { filename } => { self.read_vvm_entry(filename).await.map(ModelBytes::Onnx) } - ModelFile::Bin { filename } => { - self.read_vvm_entry(filename).await.map(ModelBytes::Bin) + ModelFile::VvBin { filename } => { + self.read_vvm_entry(filename).await.map(ModelBytes::VvBin) } } } From 92fb4c9552198535d40add7621b1b98e8c99fc2a Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Sun, 1 Sep 2024 05:24:37 +0900 Subject: [PATCH 4/9] =?UTF-8?q?ort=E3=82=92=E3=82=A2=E3=83=83=E3=83=97?= =?UTF-8?q?=E3=83=87=E3=83=BC=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 86d797ecc..ac5883390 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4212,7 +4212,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "voicevox-ort" version = "2.0.0-rc.4" -source = "git+https://github.com/qryxip/ort.git?rev=cb4acff0c978f3615a4dbaf2164d026185b88134#cb4acff0c978f3615a4dbaf2164d026185b88134" +source = "git+https://github.com/qryxip/ort.git?rev=5b5b22a135bd5257b31df9156d1cb6a51fbf92f0#5b5b22a135bd5257b31df9156d1cb6a51fbf92f0" dependencies = [ "anyhow", "half", @@ -4229,7 +4229,7 @@ dependencies = [ [[package]] name = "voicevox-ort-sys" version = "2.0.0-rc.4" -source = "git+https://github.com/qryxip/ort.git?rev=cb4acff0c978f3615a4dbaf2164d026185b88134#cb4acff0c978f3615a4dbaf2164d026185b88134" +source = "git+https://github.com/qryxip/ort.git?rev=5b5b22a135bd5257b31df9156d1cb6a51fbf92f0#5b5b22a135bd5257b31df9156d1cb6a51fbf92f0" dependencies = [ "flate2", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 0f09c15af..a89d96834 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,7 +92,7 @@ zip = "0.6.3" [workspace.dependencies.voicevox-ort] git = "https://github.com/qryxip/ort.git" -rev = "cb4acff0c978f3615a4dbaf2164d026185b88134" +rev = "5b5b22a135bd5257b31df9156d1cb6a51fbf92f0" [workspace.dependencies.open_jtalk] git = "https://github.com/VOICEVOX/open_jtalk-rs.git" From 38687d5cbbdfecdc404ec7e9268191961714ae87 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Mon, 2 Sep 2024 19:08:36 +0900 Subject: [PATCH 5/9] =?UTF-8?q?ort=E3=82=92=E3=82=A2=E3=83=83=E3=83=97?= =?UTF-8?q?=E3=83=87=E3=83=BC=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 4 ++-- Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac5883390..afeab8e33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4212,7 +4212,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "voicevox-ort" version = "2.0.0-rc.4" -source = "git+https://github.com/qryxip/ort.git?rev=5b5b22a135bd5257b31df9156d1cb6a51fbf92f0#5b5b22a135bd5257b31df9156d1cb6a51fbf92f0" +source = "git+https://github.com/VOICEVOX/ort.git?rev=3ecf05d66e2e04435fde3c8200e5208ce2707eb7#3ecf05d66e2e04435fde3c8200e5208ce2707eb7" dependencies = [ "anyhow", "half", @@ -4229,7 +4229,7 @@ dependencies = [ [[package]] name = "voicevox-ort-sys" version = "2.0.0-rc.4" -source = "git+https://github.com/qryxip/ort.git?rev=5b5b22a135bd5257b31df9156d1cb6a51fbf92f0#5b5b22a135bd5257b31df9156d1cb6a51fbf92f0" +source = "git+https://github.com/VOICEVOX/ort.git?rev=3ecf05d66e2e04435fde3c8200e5208ce2707eb7#3ecf05d66e2e04435fde3c8200e5208ce2707eb7" dependencies = [ "flate2", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index a89d96834..2bce357c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,8 +91,8 @@ windows = "0.43.0" zip = "0.6.3" [workspace.dependencies.voicevox-ort] -git = "https://github.com/qryxip/ort.git" -rev = "5b5b22a135bd5257b31df9156d1cb6a51fbf92f0" +git = "https://github.com/VOICEVOX/ort.git" +rev = "3ecf05d66e2e04435fde3c8200e5208ce2707eb7" [workspace.dependencies.open_jtalk] git = "https://github.com/VOICEVOX/open_jtalk-rs.git" From ff70980d437666d2ff29cd70b44c85c752875142 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Mon, 2 Sep 2024 19:30:45 +0900 Subject: [PATCH 6/9] =?UTF-8?q?`"type":=20"vv-bin"`=20=E2=86=92=20`"type":?= =?UTF-8?q?=20"vv=5Fbin"`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/VOICEVOX/voicevox_core/pull/825#discussion_r1740128107 --- crates/voicevox_core/src/manifest.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs index 633fbd28a..f5a923658 100644 --- a/crates/voicevox_core/src/manifest.rs +++ b/crates/voicevox_core/src/manifest.rs @@ -90,7 +90,7 @@ pub(crate) struct TalkManifest { } #[derive(Deserialize, Clone)] -#[serde(tag = "type", rename_all = "kebab-case")] +#[serde(tag = "type", rename_all = "snake_case")] pub(crate) enum ModelFile { Onnx { filename: String }, VvBin { filename: String }, From b87afd8e910e76ed96c4863aa2cd2ba8c48e7ae4 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Mon, 2 Sep 2024 23:03:23 +0900 Subject: [PATCH 7/9] =?UTF-8?q?`inputs.is=5Fproduction`=E3=82=92=E6=AE=8B?= =?UTF-8?q?=E3=81=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/VOICEVOX/voicevox_core/pull/825#discussion_r1740958377 --- .github/workflows/build_and_deploy.yml | 35 ++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml index 4f55d14b5..9f0483d8b 100644 --- a/.github/workflows/build_and_deploy.yml +++ b/.github/workflows/build_and_deploy.yml @@ -16,6 +16,13 @@ on: type: boolean required: false default: false + # TODO: ライセンス表記およびモデル配布形体の方針が固まったら廃止 + # + is_production: + description: "製品版をビルドする" + type: boolean + required: false + default: false release: types: - published @@ -186,6 +193,18 @@ jobs: run: | echo "$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin" >> "$GITHUB_PATH" echo "AR_${{ matrix.target }}=llvm-ar" >> "$GITHUB_ENV" + - name: Checkout VOICEVOX RESOURCE + if: inputs.is_production + uses: actions/checkout@v4 + with: + repository: VOICEVOX/voicevox_resource + ref: ${{ env.VOICEVOX_RESOURCE_VERSION }} + path: download/resource + - name: Raplace resource + if: inputs.is_production + shell: bash + run: + mv -f download/resource/core/README.md ./README.md - name: Install cargo-binstall uses: taiki-e/install-action@cargo-binstall - name: Install cargo-edit @@ -322,13 +341,25 @@ jobs: ${{ env.ASSET_NAME }}.zip target_commitish: ${{ github.sha }} - deploy_sample_model: + deploy_model: runs-on: ubuntu-latest needs: config env: - ASSET_NAME: sample-model-${{ needs.config.outputs.version }} + ASSET_NAME: model-${{ needs.config.outputs.version }} steps: - uses: actions/checkout@v4 + - name: Checkout VOICEVOX FAT RESOURCE + if: inputs.is_production + uses: actions/checkout@v4 + with: + repository: VOICEVOX/voicevox_fat_resource + ref: ${{ env.VOICEVOX_FAT_RESOURCE_VERSION }} + path: download/fat_resource + - name: Raplace resource + if: inputs.is_production + shell: bash + run: + rm -r ./model; mv download/fat_resource/core/model ./model - name: Create artifact run: | mkdir "artifact" From 80345e5f8ff6f788615f4d017b0c77449fc6cfab Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Wed, 4 Sep 2024 18:10:40 +0900 Subject: [PATCH 8/9] =?UTF-8?q?=E5=98=98=E3=82=B3=E3=83=A1=E3=83=B3?= =?UTF-8?q?=E3=83=88=E3=82=92=E3=83=AA=E3=83=90=E3=83=BC=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index 8377f2c4e..fedf538cf 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -10,7 +10,7 @@ //! 開きます。[CUDA]と[DirectML]が利用できます。 //! - **`link-onnxruntime`**: ONNX Runtimeをロード時動的リンクします。iOSのような`dlopen`の利用が //! 困難な環境でのみこちらを利用するべきです。_Note_: -//! [動的リンク対象のライブラリ名]は`voicevox_onnxruntime`で固定です。変更 +//! [動的リンク対象のライブラリ名]は`onnxruntime`で固定です。変更 //! は`patchelf(1)`や`install_name_tool(1)`で行ってください。また、[ONNX RuntimeのGPU機能]を使う //! ことはできません。 //! From 148fa5d6ab5f60e578e6355d2a2d17d28e2e68dd Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Wed, 4 Sep 2024 18:11:46 +0900 Subject: [PATCH 9/9] =?UTF-8?q?`install=5Fname=5Ftool`=E3=81=A7=E3=81=AErp?= =?UTF-8?q?ath=E5=A4=89=E6=9B=B4=E3=82=92voicevox=5Fonnxruntime.framework?= =?UTF-8?q?=E5=AE=9B=E3=81=A6=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build_util/make_ios_xcframework.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_util/make_ios_xcframework.bash b/build_util/make_ios_xcframework.bash index 2d35fac88..e2608955e 100755 --- a/build_util/make_ios_xcframework.bash +++ b/build_util/make_ios_xcframework.bash @@ -65,9 +65,9 @@ for arch in "${arches[@]}"; do install_name_tool -id "@rpath/voicevox_core.framework/voicevox_core" \ "Framework-${arch}/voicevox_core.framework/voicevox_core" - # 依存ライブラリonnxruntimeへの@rpathを変更 + # onnxruntimeへの@rpathを、voicevox_onnxruntimeのXCFrameworkに変更 install_name_tool -change "@rpath/$dylib_string" \ - "@rpath/onnxruntime.framework/onnxruntime" \ + "@rpath/voicevox_onnxruntime.framework/voicevox_onnxruntime" \ "Framework-${arch}/voicevox_core.framework/voicevox_core" done