From a8131d911cfd69347b5758e19a2bbd0daabe32aa Mon Sep 17 00:00:00 2001
From: Ryo Yamashita <qryxip@gmail.com>
Date: Sun, 3 Nov 2024 00:49:53 +0900
Subject: [PATCH] =?UTF-8?q?refactor:=20`Synthesizer`=E3=81=AE=E5=AE=9F?=
 =?UTF-8?q?=E8=A3=85=E3=82=92`Inner<=5F,=20A:=20Async>`=E3=81=AE=E5=BD=A2?=
 =?UTF-8?q?=E3=81=AB=E3=81=99=E3=82=8B=20(#865)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

#831 で残した次のTODOのうち、`Synthesizer`についてだけ解決する。

````rs
// TODO: `VoiceModelFile`のように、次のような設計にする。
//
// ```
// pub(crate) mod blocking {
//     pub struct Synthesizer(Inner<SingleTasked>);
//     // …
// }
// pub(crate) mod nonblocking {
//     pub struct Synthesizer(Inner<BlockingThreadPool>);
//     // …
// }
// ```
````

目的としては、 #687 のようなことを行うのを円滑にするため。

もしかしたらパフォーマンスには影響が出ているかもしれないが、そこまで大き
なものではないはず。

Refs: #831, #687
---
 crates/voicevox_core/src/status.rs            |    8 +-
 crates/voicevox_core/src/synthesizer.rs       | 1047 +++++++++++------
 crates/voicevox_core/src/voice_model.rs       |   48 +-
 .../tests/e2e/snapshots.toml                  |   20 +-
 crates/voicevox_core_python_api/src/lib.rs    |    6 +-
 5 files changed, 716 insertions(+), 413 deletions(-)
diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs
index 3234c27c4..64986f627 100644
--- a/crates/voicevox_core/src/status.rs
+++ b/crates/voicevox_core/src/status.rs
@@ -415,8 +415,8 @@ mod tests {
             },
         );
         let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap();
-        let model_contents = &model.read_inference_models().await.unwrap();
-        let result = status.insert_model(model.header(), model_contents);
+        let model_contents = &model.inner().read_inference_models().await.unwrap();
+        let result = status.insert_model(model.inner().header(), model_contents);
         assert_debug_fmt_eq!(Ok(()), result);
         assert_eq!(1, status.loaded_models.lock().unwrap().0.len());
     }
@@ -431,8 +431,8 @@ mod tests {
             },
         );
         let vvm = &crate::nonblocking::VoiceModelFile::sample().await.unwrap();
-        let model_header = vvm.header();
-        let model_contents = &vvm.read_inference_models().await.unwrap();
+        let model_header = vvm.inner().header();
+        let model_contents = &vvm.inner().read_inference_models().await.unwrap();
         assert!(
             !status.is_loaded_model(model_header.manifest.id),
             "model should  not be loaded"
diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
index adf73cb68..b30d7c3ca 100644
--- a/crates/voicevox_core/src/synthesizer.rs
+++ b/crates/voicevox_core/src/synthesizer.rs
@@ -1,15 +1,4 @@
-// TODO: `VoiceModelFile`のように、次のような設計にする。
-//
-// ```
-// pub(crate) mod blocking {
-//     pub struct Synthesizer(Inner<SingleTasked>);
-//     // …
-// }
-// pub(crate) mod nonblocking {
-//     pub struct Synthesizer(Inner<BlockingThreadPool>);
-//     // …
-// }
-// ```
+use crate::asyncs::{Async, BlockingThreadPool, SingleTasked};
 
 /// [`blocking::Synthesizer::synthesis`]および[`nonblocking::Synthesizer::synthesis`]のオプション。
 ///
@@ -79,12 +68,44 @@ pub struct InitializeOptions {
     pub cpu_num_threads: u16,
 }
 
-pub(crate) mod blocking {
+trait AsyncForOnnxruntime: Async {
+    async fn unblock<T, F>(f: F) -> T
+    where
+        F: FnOnce() -> T + Send + 'static,
+        T: Send + 'static;
+}
+
+impl AsyncForOnnxruntime for SingleTasked {
+    async fn unblock<T, F>(f: F) -> T
+    where
+        F: FnOnce() -> T + Send + 'static,
+        T: Send + 'static,
+    {
+        f()
+    }
+}
+
+impl AsyncForOnnxruntime for BlockingThreadPool {
+    async fn unblock<T, F>(f: F) -> T
+    where
+        F: FnOnce() -> T + Send + 'static,
+        T: Send + 'static,
+    {
+        ::blocking::unblock(f).await
+    }
+}
+
+mod inner {
     use enum_map::enum_map;
-    use std::io::{Cursor, Write as _};
+    use std::{
+        io::{Cursor, Write as _},
+        marker::PhantomData,
+        sync::Arc,
+    };
     use tracing::info;
 
     use crate::{
+        asyncs::{BlockingThreadPool, SingleTasked},
         devices::{DeviceSpec, GpuSpec},
         engine::{create_kana, mora_to_text, wav_from_s16le, Mora, OjtPhoneme},
         error::ErrorRepr,
@@ -95,15 +116,15 @@ pub(crate) mod blocking {
                 PredictIntonationOutput, RenderAudioSegmentInput, RenderAudioSegmentOutput,
                 TalkDomain, TalkOperation,
             },
-            InferenceRuntime as _, InferenceSessionOptions,
+            InferenceRuntime, InferenceSessionOptions,
         },
         status::Status,
         text_analyzer::{KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer},
-        AccentPhrase, AudioQuery, FullcontextExtractor, Result, StyleId, SynthesisOptions,
-        VoiceModelId, VoiceModelMeta,
+        voice_model, AccentPhrase, AudioQuery, FullcontextExtractor, Result, StyleId,
+        SynthesisOptions, VoiceModelId, VoiceModelMeta,
     };
 
-    use super::{AccelerationMode, InitializeOptions, TtsOptions};
+    use super::{AccelerationMode, AsyncForOnnxruntime, InitializeOptions, TtsOptions};
 
     const DEFAULT_SAMPLING_RATE: u32 = 24000;
 
@@ -123,52 +144,28 @@ pub(crate) mod blocking {
         audio_query: AudioQuery,
     }
 
-    /// 音声シンセサイザ。
-    pub struct Synthesizer<O> {
-        pub(super) status: Status<crate::blocking::Onnxruntime>,
+    pub struct Inner<O, A> {
+        status: Arc<Status<crate::blocking::Onnxruntime>>,
         open_jtalk_analyzer: OpenJTalkAnalyzer<O>,
         kana_analyzer: KanaAnalyzer,
         use_gpu: bool,
+        _marker: PhantomData<fn(A) -> A>,
     }
 
-    impl<O> self::Synthesizer<O> {
-        /// `Synthesizer`をコンストラクトする。
-        ///
-        /// # Example
-        ///
-        #[cfg_attr(feature = "load-onnxruntime", doc = "```")]
-        #[cfg_attr(not(feature = "load-onnxruntime"), doc = "```compile_fail")]
-        /// # fn main() -> anyhow::Result<()> {
-        /// # use test_util::{ONNXRUNTIME_DYLIB_PATH, OPEN_JTALK_DIC_DIR};
-        /// #
-        /// # const ACCELERATION_MODE: AccelerationMode = AccelerationMode::Cpu;
-        /// #
-        /// use std::sync::Arc;
-        ///
-        /// use voicevox_core::{
-        ///     blocking::{Onnxruntime, OpenJtalk, Synthesizer},
-        ///     AccelerationMode, InitializeOptions,
-        /// };
-        ///
-        /// # if cfg!(windows) {
-        /// #     // Windows\System32\onnxruntime.dllを回避
-        /// #     voicevox_core::blocking::Onnxruntime::load_once()
-        /// #         .filename(test_util::ONNXRUNTIME_DYLIB_PATH)
-        /// #         .exec()?;
-        /// # }
-        /// let mut syntesizer = Synthesizer::new(
-        ///     Onnxruntime::load_once().exec()?,
-        ///     Arc::new(OpenJtalk::new(OPEN_JTALK_DIC_DIR).unwrap()),
-        ///     &InitializeOptions {
-        ///         acceleration_mode: ACCELERATION_MODE,
-        ///         ..Default::default()
-        ///     },
-        /// )?;
-        /// #
-        /// # Ok(())
-        /// # }
-        /// ```
-        pub fn new(
+    impl<O> From<Inner<O, BlockingThreadPool>> for Inner<O, SingleTasked> {
+        fn from(from: Inner<O, BlockingThreadPool>) -> Self {
+            Self {
+                status: from.status,
+                open_jtalk_analyzer: from.open_jtalk_analyzer,
+                kana_analyzer: KanaAnalyzer,
+                use_gpu: from.use_gpu,
+                _marker: PhantomData,
+            }
+        }
+    }
+
+    impl<O, A: AsyncForOnnxruntime> Inner<O, A> {
+        pub(super) fn new(
             onnxruntime: &'static crate::blocking::Onnxruntime,
             open_jtalk: O,
             options: &InitializeOptions,
@@ -225,7 +222,8 @@ pub(crate) mod blocking {
                         TalkOperation::RenderAudioSegment => heavy_session_options,
                     },
                 },
-            );
+            )
+            .into();
 
             let use_gpu = matches!(device_for_heavy, DeviceSpec::Gpu(_));
 
@@ -234,46 +232,44 @@ pub(crate) mod blocking {
                 open_jtalk_analyzer: OpenJTalkAnalyzer::new(open_jtalk),
                 kana_analyzer: KanaAnalyzer,
                 use_gpu,
+                _marker: PhantomData,
             })
         }
 
-        pub fn onnxruntime(&self) -> &'static crate::blocking::Onnxruntime {
+        pub(super) fn onnxruntime(&self) -> &'static crate::blocking::Onnxruntime {
             self.status.rt
         }
 
-        /// ハードウェアアクセラレーションがGPUモードか判定する。
-        pub fn is_gpu_mode(&self) -> bool {
+        pub(super) fn is_gpu_mode(&self) -> bool {
             self.use_gpu
         }
 
-        /// 音声モデルを読み込む。
-        pub fn load_voice_model(&self, model: &crate::blocking::VoiceModelFile) -> Result<()> {
-            let model_bytes = &model.read_inference_models()?;
+        pub(super) async fn load_voice_model(
+            &self,
+            model: &voice_model::Inner<A>,
+        ) -> crate::Result<()> {
+            let model_bytes = &model.read_inference_models().await?;
+            // TODO: 重い操作なので、asyncにする
             self.status.insert_model(model.header(), model_bytes)
         }
 
-        /// 音声モデルの読み込みを解除する。
-        pub fn unload_voice_model(&self, voice_model_id: VoiceModelId) -> Result<()> {
+        pub(super) fn unload_voice_model(&self, voice_model_id: VoiceModelId) -> Result<()> {
             self.status.unload_model(voice_model_id)
         }
 
-        /// 指定したIDの音声モデルが読み込まれているか判定する。
-        pub fn is_loaded_voice_model(&self, voice_model_id: VoiceModelId) -> bool {
+        pub(super) fn is_loaded_voice_model(&self, voice_model_id: VoiceModelId) -> bool {
             self.status.is_loaded_model(voice_model_id)
         }
 
-        #[doc(hidden)]
-        pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
+        pub(super) fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
             self.status.is_loaded_model_by_style_id(style_id)
         }
 
-        /// 今読み込んでいる音声モデルのメタ情報を返す。
-        pub fn metas(&self) -> VoiceModelMeta {
+        pub(super) fn metas(&self) -> VoiceModelMeta {
             self.status.metas()
         }
 
-        /// AudioQueryから音声合成用の中間表現を生成する。
-        pub fn precompute_render(
+        pub(super) async fn precompute_render(
             &self,
             audio_query: &AudioQuery,
             style_id: StyleId,
@@ -393,13 +389,15 @@ pub(crate) mod blocking {
                 padding_size,
             );
 
-            let spec = self.generate_full_intermediate(
-                f0_with_padding.len(),
-                OjtPhoneme::num_phoneme(),
-                &f0_with_padding,
-                &phoneme_with_padding,
-                style_id,
-            )?;
+            let spec = self
+                .generate_full_intermediate(
+                    f0_with_padding.len(),
+                    OjtPhoneme::num_phoneme(),
+                    &f0_with_padding,
+                    &phoneme_with_padding,
+                    style_id,
+                )
+                .await?;
             return Ok(AudioFeature {
                 internal_state: spec,
                 style_id,
@@ -499,8 +497,12 @@ pub(crate) mod blocking {
             }
         }
 
-        /// 中間表現から16bit PCMで音声波形を生成する。
-        pub fn render(&self, audio: &AudioFeature, start: usize, end: usize) -> Result<Vec<u8>> {
+        pub(super) async fn render(
+            &self,
+            audio: &AudioFeature,
+            start: usize,
+            end: usize,
+        ) -> Result<Vec<u8>> {
             // TODO: 44.1kHzなどの対応
             const MARGIN: usize = 14; // 使われているHifiGANのreceptive fieldから計算される安全マージン
             use std::cmp::min;
@@ -526,8 +528,9 @@ pub(crate) mod blocking {
             let segment = audio
                 .internal_state
                 .slice(ndarray::s![slice_start..slice_end, ..]);
-            let wave_with_margin =
-                self.render_audio_segment(segment.into_owned(), audio.style_id)?;
+            let wave_with_margin = self
+                .render_audio_segment(segment.into_owned(), audio.style_id)
+                .await?;
             // 変換前に追加した安全マージンを生成音声から取り除く
             let wave = wave_with_margin
                 .slice(ndarray::s![
@@ -565,15 +568,16 @@ pub(crate) mod blocking {
             }
         }
 
-        /// AudioQueryから直接WAVフォーマットで音声波形を生成する。
-        pub fn synthesis(
+        pub(super) async fn synthesis(
             &self,
             audio_query: &AudioQuery,
             style_id: StyleId,
             options: &SynthesisOptions,
         ) -> Result<Vec<u8>> {
-            let audio = self.precompute_render(audio_query, style_id, options)?;
-            let pcm = self.render(&audio, 0, audio.frame_length)?;
+            let audio = self
+                .precompute_render(audio_query, style_id, options)
+                .await?;
+            let pcm = self.render(&audio, 0, audio.frame_length).await?;
             Ok(wav_from_s16le(
                 &pcm,
                 audio_query.output_sampling_rate,
@@ -581,53 +585,27 @@ pub(crate) mod blocking {
             ))
         }
 
-        /// AquesTalk風記法からAccentPhrase (アクセント句)の配列を生成する。
-        ///
-        /// # Example
-        ///
-        /// ```
-        /// # fn main() -> anyhow::Result<()> {
-        /// # use pollster::FutureExt as _;
-        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
-        /// #
-        /// # let synthesizer =
-        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
-        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
-        /// #         test_util::OPEN_JTALK_DIC_DIR,
-        /// #     )
-        /// #     .block_on()?
-        /// #     .into_blocking();
-        /// #
-        /// use voicevox_core::StyleId;
-        ///
-        /// let accent_phrases = synthesizer
-        ///     .create_accent_phrases_from_kana("コンニチワ'", StyleId::new(302))?;
-        /// #
-        /// # Ok(())
-        /// # }
-        /// ```
-        pub fn create_accent_phrases_from_kana(
+        pub(super) async fn create_accent_phrases_from_kana(
             &self,
             kana: &str,
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
             let accent_phrases = self.kana_analyzer.analyze(kana)?;
-            self.replace_mora_data(&accent_phrases, style_id)
+            self.replace_mora_data(&accent_phrases, style_id).await
         }
 
-        /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
-        pub fn replace_mora_data(
+        pub(super) async fn replace_mora_data(
             &self,
             accent_phrases: &[AccentPhrase],
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
-            let accent_phrases = self.replace_phoneme_length(accent_phrases, style_id)?;
-            self.replace_mora_pitch(&accent_phrases, style_id)
+            let accent_phrases = self
+                .replace_phoneme_length(accent_phrases, style_id)
+                .await?;
+            self.replace_mora_pitch(&accent_phrases, style_id).await
         }
 
-        /// AccentPhraseの配列の音素長を、特定の声で生成しなおす。
-        pub fn replace_phoneme_length(
+        pub(super) async fn replace_phoneme_length(
             &self,
             accent_phrases: &[AccentPhrase],
             style_id: StyleId,
@@ -640,7 +618,7 @@ pub(crate) mod blocking {
                 .iter()
                 .map(|phoneme_data| phoneme_data.phoneme_id())
                 .collect();
-            let phoneme_length = self.predict_duration(&phoneme_list_s, style_id)?;
+            let phoneme_length = self.predict_duration(&phoneme_list_s, style_id).await?;
 
             let mut index = 0;
             let new_accent_phrases = accent_phrases
@@ -677,8 +655,7 @@ pub(crate) mod blocking {
             Ok(new_accent_phrases)
         }
 
-        /// AccentPhraseの配列の音高を、特定の声で生成しなおす。
-        pub fn replace_mora_pitch(
+        pub(super) async fn replace_mora_pitch(
             &self,
             accent_phrases: &[AccentPhrase],
             style_id: StyleId,
@@ -727,16 +704,18 @@ pub(crate) mod blocking {
                 end_accent_phrase_list.push(base_end_accent_phrase_list[vowel_index as usize]);
             }
 
-            let mut f0_list = self.predict_intonation(
-                vowel_phoneme_list.len(),
-                &vowel_phoneme_list,
-                &consonant_phoneme_list,
-                &start_accent_list,
-                &end_accent_list,
-                &start_accent_phrase_list,
-                &end_accent_phrase_list,
-                style_id,
-            )?;
+            let mut f0_list = self
+                .predict_intonation(
+                    vowel_phoneme_list.len(),
+                    &vowel_phoneme_list,
+                    &consonant_phoneme_list,
+                    &start_accent_list,
+                    &end_accent_list,
+                    &start_accent_phrase_list,
+                    &end_accent_phrase_list,
+                    style_id,
+                )
+                .await?;
 
             for i in 0..vowel_phoneme_data_list.len() {
                 const UNVOICED_MORA_PHONEME_LIST: &[&str] = &["A", "I", "U", "E", "O", "cl", "pau"];
@@ -802,143 +781,75 @@ pub(crate) mod blocking {
             }
         }
 
-        /// AquesTalk風記法から[AudioQuery]を生成する。
-        ///
-        /// # Example
-        ///
-        /// ```
-        /// # fn main() -> anyhow::Result<()> {
-        /// # use pollster::FutureExt as _;
-        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
-        /// #
-        /// # let synthesizer =
-        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
-        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
-        /// #         test_util::OPEN_JTALK_DIC_DIR,
-        /// #     )
-        /// #     .block_on()?
-        /// #     .into_blocking();
-        /// #
-        /// use voicevox_core::StyleId;
-        ///
-        /// let audio_query = synthesizer.audio_query_from_kana("コンニチワ'", StyleId::new(302))?;
-        /// #
-        /// # Ok(())
-        /// # }
-        /// ```
-        ///
-        /// [AudioQuery]: crate::AudioQuery
-        pub fn audio_query_from_kana(&self, kana: &str, style_id: StyleId) -> Result<AudioQuery> {
-            let accent_phrases = self.create_accent_phrases_from_kana(kana, style_id)?;
+        pub(super) async fn audio_query_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+        ) -> Result<AudioQuery> {
+            let accent_phrases = self.create_accent_phrases_from_kana(kana, style_id).await?;
             Ok(AudioQuery::from_accent_phrases(accent_phrases).with_kana(Some(kana.to_owned())))
         }
 
-        /// AquesTalk風記法から音声合成を行う。
-        pub fn tts_from_kana(
+        pub(super) async fn tts_from_kana(
             &self,
             kana: &str,
             style_id: StyleId,
             options: &TtsOptions,
         ) -> Result<Vec<u8>> {
-            let audio_query = &self.audio_query_from_kana(kana, style_id)?;
+            let audio_query = &self.audio_query_from_kana(kana, style_id).await?;
             self.synthesis(audio_query, style_id, &SynthesisOptions::from(options))
+                .await
         }
     }
 
-    impl<O: FullcontextExtractor> self::Synthesizer<O> {
-        /// 日本語のテキストからAccentPhrase (アクセント句)の配列を生成する。
-        ///
-        /// # Example
-        ///
-        /// ```
-        /// # fn main() -> anyhow::Result<()> {
-        /// # use pollster::FutureExt as _;
-        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
-        /// #
-        /// # let synthesizer =
-        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
-        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
-        /// #         test_util::OPEN_JTALK_DIC_DIR,
-        /// #     )
-        /// #     .block_on()?
-        /// #     .into_blocking();
-        /// #
-        /// use voicevox_core::StyleId;
-        ///
-        /// let accent_phrases = synthesizer.create_accent_phrases("こんにちは", StyleId::new(302))?;
-        /// #
-        /// # Ok(())
-        /// # }
-        /// ```
-        pub fn create_accent_phrases(
+    impl<O: FullcontextExtractor, A: AsyncForOnnxruntime> Inner<O, A> {
+        pub(super) async fn create_accent_phrases(
             &self,
             text: &str,
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
             let accent_phrases = self.open_jtalk_analyzer.analyze(text)?;
-            self.replace_mora_data(&accent_phrases, style_id)
+            self.replace_mora_data(&accent_phrases, style_id).await
         }
 
-        /// 日本語のテキストから[AudioQuery]を生成する。
-        ///
-        /// # Examples
-        ///
-        /// ```
-        /// # fn main() -> anyhow::Result<()> {
-        /// # use pollster::FutureExt as _;
-        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
-        /// #
-        /// # let synthesizer =
-        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
-        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
-        /// #         test_util::OPEN_JTALK_DIC_DIR,
-        /// #     )
-        /// #     .block_on()?
-        /// #     .into_blocking();
-        /// #
-        /// use voicevox_core::StyleId;
-        ///
-        /// let audio_query = synthesizer.audio_query("こんにちは", StyleId::new(302))?;
-        /// #
-        /// # Ok(())
-        /// # }
-        /// ```
-        ///
-        /// [AudioQuery]: crate::AudioQuery
-        pub fn audio_query(&self, text: &str, style_id: StyleId) -> Result<AudioQuery> {
-            let accent_phrases = self.create_accent_phrases(text, style_id)?;
+        pub(super) async fn audio_query(
+            &self,
+            text: &str,
+            style_id: StyleId,
+        ) -> Result<AudioQuery> {
+            let accent_phrases = self.create_accent_phrases(text, style_id).await?;
             Ok(AudioQuery::from_accent_phrases(accent_phrases))
         }
 
-        /// 日本語のテキストから音声合成を行う。
-        pub fn tts(&self, text: &str, style_id: StyleId, options: &TtsOptions) -> Result<Vec<u8>> {
-            let audio_query = &self.audio_query(text, style_id)?;
+        pub(super) async fn tts(
+            &self,
+            text: &str,
+            style_id: StyleId,
+            options: &TtsOptions,
+        ) -> Result<Vec<u8>> {
+            let audio_query = &self.audio_query(text, style_id).await?;
             self.synthesis(audio_query, style_id, &SynthesisOptions::from(options))
+                .await
         }
     }
 
-    pub trait PerformInference {
-        /// `predict_duration`を実行する。
-        ///
-        /// # Performance
-        ///
-        /// CPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
-        fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>>;
+    impl<O, A: AsyncForOnnxruntime> Inner<O, A> {
+        pub(super) async fn predict_duration(
+            &self,
+            phoneme_vector: &[i64],
+            style_id: StyleId,
+        ) -> Result<Vec<f32>> {
+            let status = self.status.clone();
+            let phoneme_vector = ndarray::arr1(phoneme_vector);
+            A::unblock(move || status.predict_duration(phoneme_vector, style_id)).await
+        }
 
-        /// `predict_intonation`を実行する。
-        ///
-        /// # Performance
-        ///
-        /// CPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
         #[expect(
             clippy::too_many_arguments,
             reason = "compatible_engineでの`predict_intonation`の形を考えると、ここの引数を構造体に\
                       まとめたりしても可読性に寄与しない"
         )]
-        fn predict_intonation(
+        pub(super) async fn predict_intonation(
             &self,
             length: usize,
             vowel_phoneme_vector: &[i64],
@@ -948,48 +859,92 @@ pub(crate) mod blocking {
             start_accent_phrase_vector: &[i64],
             end_accent_phrase_vector: &[i64],
             style_id: StyleId,
-        ) -> Result<Vec<f32>>;
+        ) -> Result<Vec<f32>> {
+            let status = self.status.clone();
+            let vowel_phoneme_vector = ndarray::arr1(vowel_phoneme_vector);
+            let consonant_phoneme_vector = ndarray::arr1(consonant_phoneme_vector);
+            let start_accent_vector = ndarray::arr1(start_accent_vector);
+            let end_accent_vector = ndarray::arr1(end_accent_vector);
+            let start_accent_phrase_vector = ndarray::arr1(start_accent_phrase_vector);
+            let end_accent_phrase_vector = ndarray::arr1(end_accent_phrase_vector);
+            A::unblock(move || {
+                status.predict_intonation(
+                    length,
+                    vowel_phoneme_vector,
+                    consonant_phoneme_vector,
+                    start_accent_vector,
+                    end_accent_vector,
+                    start_accent_phrase_vector,
+                    end_accent_phrase_vector,
+                    style_id,
+                )
+            })
+            .await
+        }
 
-        fn generate_full_intermediate(
+        pub(super) async fn generate_full_intermediate(
             &self,
             length: usize,
             phoneme_size: usize,
             f0: &[f32],
             phoneme_vector: &[f32],
             style_id: StyleId,
-        ) -> Result<ndarray::Array2<f32>>;
+        ) -> Result<ndarray::Array2<f32>> {
+            let status = self.status.clone();
+            let f0 = ndarray::arr1(f0);
+            let phoneme_vector = ndarray::arr1(phoneme_vector);
+            A::unblock(move || {
+                status.generate_full_intermediate(
+                    length,
+                    phoneme_size,
+                    f0,
+                    phoneme_vector,
+                    style_id,
+                )
+            })
+            .await
+        }
 
-        fn render_audio_segment(
+        pub(super) async fn render_audio_segment(
             &self,
             spec: ndarray::Array2<f32>,
             style_id: StyleId,
-        ) -> Result<ndarray::Array1<f32>>;
+        ) -> Result<ndarray::Array1<f32>> {
+            let status = self.status.clone();
+            A::unblock(move || status.render_audio_segment(spec, style_id)).await
+        }
 
-        /// `decode`を実行する。
-        ///
-        /// # Performance
-        ///
-        /// CPU/GPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
-        fn decode(
+        pub(super) async fn decode(
             &self,
             length: usize,
             phoneme_size: usize,
             f0: &[f32],
             phoneme_vector: &[f32],
             style_id: StyleId,
-        ) -> Result<Vec<f32>>;
+        ) -> Result<Vec<f32>> {
+            let status = self.status.clone();
+            let f0 = ndarray::arr1(f0);
+            let phoneme_vector = ndarray::arr1(phoneme_vector);
+            A::unblock(move || status.decode(length, phoneme_size, f0, phoneme_vector, style_id))
+                .await
+        }
     }
 
-    impl<O> PerformInference for self::Synthesizer<O> {
-        fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
-            let (model_id, inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
+    impl<R: InferenceRuntime> Status<R> {
+        /// CPU-boundな操作なので、非同期ランタイム上では直接実行されるべきではない。
+        fn predict_duration(
+            &self,
+            phoneme_vector: ndarray::Array1<i64>,
+            style_id: StyleId,
+        ) -> Result<Vec<f32>> {
+            let (model_id, inner_voice_id) = self.ids_for::<TalkDomain>(style_id)?;
 
             let PredictDurationOutput {
                 phoneme_length: output,
-            } = self.status.run_session(
+            } = self.run_session(
                 model_id,
                 PredictDurationInput {
-                    phoneme_list: ndarray::arr1(phoneme_vector),
+                    phoneme_list: phoneme_vector,
                     speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]),
                 },
             )?;
@@ -1006,29 +961,35 @@ pub(crate) mod blocking {
             const PHONEME_LENGTH_MINIMAL: f32 = 0.01;
         }
 
+        /// CPU-boundな操作なので、非同期ランタイム上では直接実行されるべきではない。
+        #[expect(
+            clippy::too_many_arguments,
+            reason = "compatible_engineでの`predict_intonation`の形を考えると、ここの引数を構造体に\
+                      まとめたりしても可読性に寄与しない"
+        )]
         fn predict_intonation(
             &self,
             length: usize,
-            vowel_phoneme_vector: &[i64],
-            consonant_phoneme_vector: &[i64],
-            start_accent_vector: &[i64],
-            end_accent_vector: &[i64],
-            start_accent_phrase_vector: &[i64],
-            end_accent_phrase_vector: &[i64],
+            vowel_phoneme_vector: ndarray::Array1<i64>,
+            consonant_phoneme_vector: ndarray::Array1<i64>,
+            start_accent_vector: ndarray::Array1<i64>,
+            end_accent_vector: ndarray::Array1<i64>,
+            start_accent_phrase_vector: ndarray::Array1<i64>,
+            end_accent_phrase_vector: ndarray::Array1<i64>,
             style_id: StyleId,
         ) -> Result<Vec<f32>> {
-            let (model_id, inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
+            let (model_id, inner_voice_id) = self.ids_for::<TalkDomain>(style_id)?;
 
-            let PredictIntonationOutput { f0_list: output } = self.status.run_session(
+            let PredictIntonationOutput { f0_list: output } = self.run_session(
                 model_id,
                 PredictIntonationInput {
                     length: ndarray::arr0(length as i64),
-                    vowel_phoneme_list: ndarray::arr1(vowel_phoneme_vector),
-                    consonant_phoneme_list: ndarray::arr1(consonant_phoneme_vector),
-                    start_accent_list: ndarray::arr1(start_accent_vector),
-                    end_accent_list: ndarray::arr1(end_accent_vector),
-                    start_accent_phrase_list: ndarray::arr1(start_accent_phrase_vector),
-                    end_accent_phrase_list: ndarray::arr1(end_accent_phrase_vector),
+                    vowel_phoneme_list: vowel_phoneme_vector,
+                    consonant_phoneme_list: consonant_phoneme_vector,
+                    start_accent_list: start_accent_vector,
+                    end_accent_list: end_accent_vector,
+                    start_accent_phrase_list: start_accent_phrase_vector,
+                    end_accent_phrase_list: end_accent_phrase_vector,
                     speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]),
                 },
             )?;
@@ -1036,47 +997,47 @@ pub(crate) mod blocking {
             Ok(output.into_raw_vec())
         }
 
+        /// CPU-boundな操作なので、非同期ランタイム上では直接実行されるべきではない。
         fn generate_full_intermediate(
             &self,
             length: usize,
             phoneme_size: usize,
-            f0: &[f32],
-            phoneme_vector: &[f32],
+            f0: ndarray::Array1<f32>,
+            phoneme_vector: ndarray::Array1<f32>,
             style_id: StyleId,
         ) -> Result<ndarray::Array2<f32>> {
-            let (model_id, inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
+            let (model_id, inner_voice_id) = self.ids_for::<TalkDomain>(style_id)?;
 
-            let GenerateFullIntermediateOutput { spec } = self.status.run_session(
+            let GenerateFullIntermediateOutput { spec } = self.run_session(
                 model_id,
                 GenerateFullIntermediateInput {
-                    f0: ndarray::arr1(f0).into_shape([length, 1]).unwrap(),
-                    phoneme: ndarray::arr1(phoneme_vector)
-                        .into_shape([length, phoneme_size])
-                        .unwrap(),
+                    f0: f0.into_shape([length, 1]).unwrap(),
+                    phoneme: phoneme_vector.into_shape([length, phoneme_size]).unwrap(),
                     speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]),
                 },
             )?;
             Ok(spec)
         }
 
+        /// CPU/GPU-boundな操作なので、非同期ランタイム上では直接実行されるべきではない。
         fn render_audio_segment(
             &self,
             spec: ndarray::Array2<f32>,
             style_id: StyleId,
         ) -> Result<ndarray::Array1<f32>> {
-            let (model_id, _inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
-            let RenderAudioSegmentOutput { wave } = self
-                .status
-                .run_session(model_id, RenderAudioSegmentInput { spec })?;
+            let (model_id, _inner_voice_id) = self.ids_for::<TalkDomain>(style_id)?;
+            let RenderAudioSegmentOutput { wave } =
+                self.run_session(model_id, RenderAudioSegmentInput { spec })?;
             Ok(wave)
         }
 
+        /// CPU/GPU-boundな操作なので、非同期ランタイム上では直接実行されるべきではない。
         fn decode(
             &self,
             length: usize,
             phoneme_size: usize,
-            f0: &[f32],
-            phoneme_vector: &[f32],
+            f0: ndarray::Array1<f32>,
+            phoneme_vector: ndarray::Array1<f32>,
             style_id: StyleId,
         ) -> Result<Vec<f32>> {
             let intermediate = self.generate_full_intermediate(
@@ -1228,17 +1189,411 @@ pub(crate) mod blocking {
     }
 }
 
-pub(crate) mod nonblocking {
-    use std::sync::Arc;
+#[expect(
+    clippy::too_many_arguments,
+    reason = "`PerformInference::predict_intonation`用。compatible_engineでの`predict_intonation`の\
+              形を考えると、ここの引数を構造体にまとめたりしても可読性に寄与しない"
+)]
+pub(crate) mod blocking {
+    use easy_ext::ext;
+
+    use crate::{
+        asyncs::SingleTasked, future::FutureExt as _, AccentPhrase, AudioQuery,
+        FullcontextExtractor, StyleId, VoiceModelId, VoiceModelMeta,
+    };
+
+    use super::{inner::Inner, InitializeOptions, SynthesisOptions, TtsOptions};
+
+    pub use super::inner::AudioFeature;
+
+    /// 音声シンセサイザ。
+    pub struct Synthesizer<O>(pub(super) Inner<O, SingleTasked>);
+
+    impl<O> self::Synthesizer<O> {
+        /// `Synthesizer`をコンストラクトする。
+        ///
+        /// # Example
+        ///
+        #[cfg_attr(feature = "load-onnxruntime", doc = "```")]
+        #[cfg_attr(not(feature = "load-onnxruntime"), doc = "```compile_fail")]
+        /// # fn main() -> anyhow::Result<()> {
+        /// # use test_util::{ONNXRUNTIME_DYLIB_PATH, OPEN_JTALK_DIC_DIR};
+        /// #
+        /// # const ACCELERATION_MODE: AccelerationMode = AccelerationMode::Cpu;
+        /// #
+        /// use std::sync::Arc;
+        ///
+        /// use voicevox_core::{
+        ///     blocking::{Onnxruntime, OpenJtalk, Synthesizer},
+        ///     AccelerationMode, InitializeOptions,
+        /// };
+        ///
+        /// # if cfg!(windows) {
+        /// #     // Windows\System32\onnxruntime.dllを回避
+        /// #     voicevox_core::blocking::Onnxruntime::load_once()
+        /// #         .filename(test_util::ONNXRUNTIME_DYLIB_PATH)
+        /// #         .exec()?;
+        /// # }
+        /// let mut syntesizer = Synthesizer::new(
+        ///     Onnxruntime::load_once().exec()?,
+        ///     Arc::new(OpenJtalk::new(OPEN_JTALK_DIC_DIR).unwrap()),
+        ///     &InitializeOptions {
+        ///         acceleration_mode: ACCELERATION_MODE,
+        ///         ..Default::default()
+        ///     },
+        /// )?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        pub fn new(
+            onnxruntime: &'static crate::blocking::Onnxruntime,
+            open_jtalk: O,
+            options: &InitializeOptions,
+        ) -> crate::Result<Self> {
+            Inner::new(onnxruntime, open_jtalk, options).map(Self)
+        }
+
+        pub fn onnxruntime(&self) -> &'static crate::blocking::Onnxruntime {
+            self.0.onnxruntime()
+        }
+
+        /// ハードウェアアクセラレーションがGPUモードか判定する。
+        pub fn is_gpu_mode(&self) -> bool {
+            self.0.is_gpu_mode()
+        }
+
+        /// 音声モデルを読み込む。
+        pub fn load_voice_model(
+            &self,
+            model: &crate::blocking::VoiceModelFile,
+        ) -> crate::Result<()> {
+            self.0.load_voice_model(model.inner()).block_on()
+        }
+
+        /// 音声モデルの読み込みを解除する。
+        pub fn unload_voice_model(&self, voice_model_id: VoiceModelId) -> crate::Result<()> {
+            self.0.unload_voice_model(voice_model_id)
+        }
+
+        /// 指定したIDの音声モデルが読み込まれているか判定する。
+        pub fn is_loaded_voice_model(&self, voice_model_id: VoiceModelId) -> bool {
+            self.0.is_loaded_voice_model(voice_model_id)
+        }
+
+        #[doc(hidden)]
+        pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
+            self.0.is_loaded_model_by_style_id(style_id)
+        }
+
+        /// 今読み込んでいる音声モデルのメタ情報を返す。
+        pub fn metas(&self) -> VoiceModelMeta {
+            self.0.metas()
+        }
+
+        /// AudioQueryから音声合成用の中間表現を生成する。
+        pub fn precompute_render(
+            &self,
+            audio_query: &AudioQuery,
+            style_id: StyleId,
+            options: &SynthesisOptions,
+        ) -> crate::Result<AudioFeature> {
+            self.0
+                .precompute_render(audio_query, style_id, options)
+                .block_on()
+        }
+
+        /// 中間表現から16bit PCMで音声波形を生成する。
+        pub fn render(
+            &self,
+            audio: &AudioFeature,
+            start: usize,
+            end: usize,
+        ) -> crate::Result<Vec<u8>> {
+            self.0.render(audio, start, end).block_on()
+        }
+
+        /// AudioQueryから直接WAVフォーマットで音声波形を生成する。
+        pub fn synthesis(
+            &self,
+            audio_query: &AudioQuery,
+            style_id: StyleId,
+            options: &SynthesisOptions,
+        ) -> crate::Result<Vec<u8>> {
+            self.0.synthesis(audio_query, style_id, options).block_on()
+        }
+
+        /// AquesTalk風記法からAccentPhrase (アクセント句)の配列を生成する。
+        ///
+        /// # Example
+        ///
+        /// ```
+        /// # fn main() -> anyhow::Result<()> {
+        /// # use pollster::FutureExt as _;
+        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
+        /// #
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
+        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .block_on()?
+        /// #     .into_blocking();
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let accent_phrases = synthesizer
+        ///     .create_accent_phrases_from_kana("コンニチワ'", StyleId::new(302))?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        pub fn create_accent_phrases_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+        ) -> crate::Result<Vec<AccentPhrase>> {
+            self.0
+                .create_accent_phrases_from_kana(kana, style_id)
+                .block_on()
+        }
+
+        /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
+        pub fn replace_mora_data(
+            &self,
+            accent_phrases: &[AccentPhrase],
+            style_id: StyleId,
+        ) -> crate::Result<Vec<AccentPhrase>> {
+            self.0
+                .replace_mora_data(accent_phrases, style_id)
+                .block_on()
+        }
+
+        /// AccentPhraseの配列の音素長を、特定の声で生成しなおす。
+        pub fn replace_phoneme_length(
+            &self,
+            accent_phrases: &[AccentPhrase],
+            style_id: StyleId,
+        ) -> crate::Result<Vec<AccentPhrase>> {
+            self.0
+                .replace_phoneme_length(accent_phrases, style_id)
+                .block_on()
+        }
+
+        /// AccentPhraseの配列の音高を、特定の声で生成しなおす。
+        pub fn replace_mora_pitch(
+            &self,
+            accent_phrases: &[AccentPhrase],
+            style_id: StyleId,
+        ) -> crate::Result<Vec<AccentPhrase>> {
+            self.0
+                .replace_mora_pitch(accent_phrases, style_id)
+                .block_on()
+        }
+
+        /// AquesTalk風記法から[AudioQuery]を生成する。
+        ///
+        /// # Example
+        ///
+        /// ```
+        /// # fn main() -> anyhow::Result<()> {
+        /// # use pollster::FutureExt as _;
+        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
+        /// #
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
+        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .block_on()?
+        /// #     .into_blocking();
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let audio_query = synthesizer.audio_query_from_kana("コンニチワ'", StyleId::new(302))?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        ///
+        /// [AudioQuery]: crate::AudioQuery
+        pub fn audio_query_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+        ) -> crate::Result<AudioQuery> {
+            self.0.audio_query_from_kana(kana, style_id).block_on()
+        }
+
+        /// AquesTalk風記法から音声合成を行う。
+        pub fn tts_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+            options: &TtsOptions,
+        ) -> crate::Result<Vec<u8>> {
+            self.0.tts_from_kana(kana, style_id, options).block_on()
+        }
+    }
 
+    impl<O: FullcontextExtractor> self::Synthesizer<O> {
+        /// 日本語のテキストからAccentPhrase (アクセント句)の配列を生成する。
+        ///
+        /// # Example
+        ///
+        /// ```
+        /// # fn main() -> anyhow::Result<()> {
+        /// # use pollster::FutureExt as _;
+        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
+        /// #
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
+        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .block_on()?
+        /// #     .into_blocking();
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let accent_phrases = synthesizer.create_accent_phrases("こんにちは", StyleId::new(302))?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        pub fn create_accent_phrases(
+            &self,
+            text: &str,
+            style_id: StyleId,
+        ) -> crate::Result<Vec<AccentPhrase>> {
+            self.0.create_accent_phrases(text, style_id).block_on()
+        }
+
+        /// 日本語のテキストから[AudioQuery]を生成する。
+        ///
+        /// # Examples
+        ///
+        /// ```
+        /// # fn main() -> anyhow::Result<()> {
+        /// # use pollster::FutureExt as _;
+        /// # use voicevox_core::__internal::doctest_fixtures::IntoBlocking as _;
+        /// #
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::SAMPLE_VOICE_MODEL_FILE_PATH,
+        /// #         test_util::ONNXRUNTIME_DYLIB_PATH,
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .block_on()?
+        /// #     .into_blocking();
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let audio_query = synthesizer.audio_query("こんにちは", StyleId::new(302))?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        ///
+        /// [AudioQuery]: crate::AudioQuery
+        pub fn audio_query(&self, text: &str, style_id: StyleId) -> crate::Result<AudioQuery> {
+            self.0.audio_query(text, style_id).block_on()
+        }
+
+        /// 日本語のテキストから音声合成を行う。
+        pub fn tts(
+            &self,
+            text: &str,
+            style_id: StyleId,
+            options: &TtsOptions,
+        ) -> crate::Result<Vec<u8>> {
+            self.0.tts(text, style_id, options).block_on()
+        }
+    }
+
+    #[ext(PerformInference)]
+    impl self::Synthesizer<()> {
+        pub fn predict_duration(
+            &self,
+            phoneme_vector: &[i64],
+            style_id: StyleId,
+        ) -> crate::Result<Vec<f32>> {
+            self.0.predict_duration(phoneme_vector, style_id).block_on()
+        }
+
+        pub fn predict_intonation(
+            &self,
+            length: usize,
+            vowel_phoneme_vector: &[i64],
+            consonant_phoneme_vector: &[i64],
+            start_accent_vector: &[i64],
+            end_accent_vector: &[i64],
+            start_accent_phrase_vector: &[i64],
+            end_accent_phrase_vector: &[i64],
+            style_id: StyleId,
+        ) -> crate::Result<Vec<f32>> {
+            self.0
+                .predict_intonation(
+                    length,
+                    vowel_phoneme_vector,
+                    consonant_phoneme_vector,
+                    start_accent_vector,
+                    end_accent_vector,
+                    start_accent_phrase_vector,
+                    end_accent_phrase_vector,
+                    style_id,
+                )
+                .block_on()
+        }
+
+        pub fn generate_full_intermediate(
+            &self,
+            length: usize,
+            phoneme_size: usize,
+            f0: &[f32],
+            phoneme_vector: &[f32],
+            style_id: StyleId,
+        ) -> crate::Result<ndarray::Array2<f32>> {
+            self.0
+                .generate_full_intermediate(length, phoneme_size, f0, phoneme_vector, style_id)
+                .block_on()
+        }
+
+        pub fn render_audio_segment(
+            &self,
+            spec: ndarray::Array2<f32>,
+            style_id: StyleId,
+        ) -> crate::Result<ndarray::Array1<f32>> {
+            self.0.render_audio_segment(spec, style_id).block_on()
+        }
+
+        pub fn decode(
+            &self,
+            length: usize,
+            phoneme_size: usize,
+            f0: &[f32],
+            phoneme_vector: &[f32],
+            style_id: StyleId,
+        ) -> crate::Result<Vec<f32>> {
+            self.0
+                .decode(length, phoneme_size, f0, phoneme_vector, style_id)
+                .block_on()
+        }
+    }
+}
+
+pub(crate) mod nonblocking {
     use easy_ext::ext;
 
     use crate::{
-        AccentPhrase, AudioQuery, FullcontextExtractor, Result, StyleId, SynthesisOptions,
-        VoiceModelId, VoiceModelMeta,
+        asyncs::BlockingThreadPool, AccentPhrase, AudioQuery, FullcontextExtractor, Result,
+        StyleId, SynthesisOptions, VoiceModelId, VoiceModelMeta,
     };
 
-    use super::{InitializeOptions, TtsOptions};
+    use super::{inner::Inner, InitializeOptions, TtsOptions};
 
     /// 音声シンセサイザ。
     ///
@@ -1248,8 +1603,7 @@ pub(crate) mod nonblocking {
     ///
     /// [blocking]: https://docs.rs/crate/blocking
     /// [`nonblocking`モジュールのドキュメント]: crate::nonblocking
-    #[derive(Clone)]
-    pub struct Synthesizer<O>(pub(super) Arc<super::blocking::Synthesizer<O>>);
+    pub struct Synthesizer<O>(pub(super) Inner<O, BlockingThreadPool>);
 
     impl<O: Send + Sync + 'static> self::Synthesizer<O> {
         /// `Synthesizer`をコンストラクトする。
@@ -1294,7 +1648,7 @@ pub(crate) mod nonblocking {
             open_jtalk: O,
             options: &InitializeOptions,
         ) -> Result<Self> {
-            super::blocking::Synthesizer::new(&onnxruntime.0, open_jtalk, options)
+            Inner::new(&onnxruntime.0, open_jtalk, options)
                 .map(Into::into)
                 .map(Self)
         }
@@ -1313,8 +1667,7 @@ pub(crate) mod nonblocking {
             &self,
             model: &crate::nonblocking::VoiceModelFile,
         ) -> Result<()> {
-            let model_bytes = &model.read_inference_models().await?;
-            self.0.status.insert_model(model.header(), model_bytes)
+            self.0.load_voice_model(model.inner()).await
         }
 
         /// 音声モデルの読み込みを解除する。
@@ -1344,12 +1697,7 @@ pub(crate) mod nonblocking {
             style_id: StyleId,
             options: &SynthesisOptions,
         ) -> Result<Vec<u8>> {
-            let blocking = self.0.clone();
-            let audio_query = audio_query.clone();
-            let options = options.clone();
-
-            crate::task::asyncify(move || blocking.synthesis(&audio_query, style_id, &options))
-                .await
+            self.0.synthesis(audio_query, style_id, options).await
         }
 
         /// AquesTalk風記法からAccentPhrase (アクセント句)の配列を生成する。
@@ -1381,11 +1729,7 @@ pub(crate) mod nonblocking {
             kana: &str,
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
-            let blocking = self.0.clone();
-            let kana = kana.to_owned();
-
-            crate::task::asyncify(move || blocking.create_accent_phrases_from_kana(&kana, style_id))
-                .await
+            self.0.create_accent_phrases_from_kana(kana, style_id).await
         }
 
         /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
@@ -1394,11 +1738,7 @@ pub(crate) mod nonblocking {
             accent_phrases: &[AccentPhrase],
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
-            let blocking = self.0.clone();
-            let accent_phrases = accent_phrases.to_owned();
-
-            crate::task::asyncify(move || blocking.replace_mora_data(&accent_phrases, style_id))
-                .await
+            self.0.replace_mora_data(accent_phrases, style_id).await
         }
 
         /// AccentPhraseの配列の音素長を、特定の声で生成しなおす。
@@ -1407,13 +1747,9 @@ pub(crate) mod nonblocking {
             accent_phrases: &[AccentPhrase],
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
-            let blocking = self.0.clone();
-            let accent_phrases = accent_phrases.to_owned();
-
-            crate::task::asyncify(move || {
-                blocking.replace_phoneme_length(&accent_phrases, style_id)
-            })
-            .await
+            self.0
+                .replace_phoneme_length(accent_phrases, style_id)
+                .await
         }
 
         /// AccentPhraseの配列の音高を、特定の声で生成しなおす。
@@ -1422,11 +1758,7 @@ pub(crate) mod nonblocking {
             accent_phrases: &[AccentPhrase],
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
-            let blocking = self.0.clone();
-            let accent_phrases = accent_phrases.to_owned();
-
-            crate::task::asyncify(move || blocking.replace_mora_pitch(&accent_phrases, style_id))
-                .await
+            self.0.replace_mora_pitch(accent_phrases, style_id).await
         }
 
         /// AquesTalk風記法から[AudioQuery]を生成する。
@@ -1460,10 +1792,7 @@ pub(crate) mod nonblocking {
             kana: &str,
             style_id: StyleId,
         ) -> Result<AudioQuery> {
-            let blocking = self.0.clone();
-            let kana = kana.to_owned();
-
-            crate::task::asyncify(move || blocking.audio_query_from_kana(&kana, style_id)).await
+            self.0.audio_query_from_kana(kana, style_id).await
         }
 
         /// AquesTalk風記法から音声合成を行う。
@@ -1473,11 +1802,7 @@ pub(crate) mod nonblocking {
             style_id: StyleId,
             options: &TtsOptions,
         ) -> Result<Vec<u8>> {
-            let blocking = self.0.clone();
-            let kana = kana.to_owned();
-            let options = options.clone();
-
-            crate::task::asyncify(move || blocking.tts_from_kana(&kana, style_id, &options)).await
+            self.0.tts_from_kana(kana, style_id, options).await
         }
     }
 
@@ -1511,10 +1836,7 @@ pub(crate) mod nonblocking {
             text: &str,
             style_id: StyleId,
         ) -> Result<Vec<AccentPhrase>> {
-            let blocking = self.0.clone();
-            let text = text.to_owned();
-
-            crate::task::asyncify(move || blocking.create_accent_phrases(&text, style_id)).await
+            self.0.create_accent_phrases(text, style_id).await
         }
 
         /// 日本語のテキストから[AudioQuery]を生成する。
@@ -1544,10 +1866,7 @@ pub(crate) mod nonblocking {
         ///
         /// [AudioQuery]: crate::AudioQuery
         pub async fn audio_query(&self, text: &str, style_id: StyleId) -> Result<AudioQuery> {
-            let blocking = self.0.clone();
-            let text = text.to_owned();
-
-            crate::task::asyncify(move || blocking.audio_query(&text, style_id)).await
+            self.0.audio_query(text, style_id).await
         }
 
         /// 日本語のテキストから音声合成を行う。
@@ -1557,26 +1876,21 @@ pub(crate) mod nonblocking {
             style_id: StyleId,
             options: &TtsOptions,
         ) -> Result<Vec<u8>> {
-            let blocking = self.0.clone();
-            let text = text.to_owned();
-            let options = options.clone();
-
-            crate::task::asyncify(move || blocking.tts(&text, style_id, &options)).await
+            self.0.tts(text, style_id, options).await
         }
     }
 
     #[ext(IntoBlocking)]
     impl<O> self::Synthesizer<O> {
-        pub fn into_blocking(self) -> Arc<super::blocking::Synthesizer<O>> {
-            self.0
+        pub fn into_blocking(self) -> super::blocking::Synthesizer<O> {
+            super::blocking::Synthesizer(self.0.into())
         }
     }
 }
 
 #[cfg(test)]
 mod tests {
-
-    use super::{blocking::PerformInference as _, AccelerationMode, InitializeOptions};
+    use super::{AccelerationMode, InitializeOptions};
     use crate::{engine::Mora, macros::tests::assert_debug_fmt_eq, AccentPhrase, Result, StyleId};
     use ::test_util::OPEN_JTALK_DIC_DIR;
     use rstest::rstest;
@@ -1686,7 +2000,8 @@ mod tests {
 
         let result = syntesizer
             .0
-            .predict_duration(&phoneme_vector, StyleId::new(1));
+            .predict_duration(&phoneme_vector, StyleId::new(1))
+            .await;
 
         assert!(result.is_ok(), "{result:?}");
         assert_eq!(result.unwrap().len(), phoneme_vector.len());
@@ -1719,16 +2034,19 @@ mod tests {
         let start_accent_phrase_vector = [0, 1, 0, 0, 0];
         let end_accent_phrase_vector = [0, 0, 0, 1, 0];
 
-        let result = syntesizer.0.predict_intonation(
-            vowel_phoneme_vector.len(),
-            &vowel_phoneme_vector,
-            &consonant_phoneme_vector,
-            &start_accent_vector,
-            &end_accent_vector,
-            &start_accent_phrase_vector,
-            &end_accent_phrase_vector,
-            StyleId::new(1),
-        );
+        let result = syntesizer
+            .0
+            .predict_intonation(
+                vowel_phoneme_vector.len(),
+                &vowel_phoneme_vector,
+                &consonant_phoneme_vector,
+                &start_accent_vector,
+                &end_accent_vector,
+                &start_accent_phrase_vector,
+                &end_accent_phrase_vector,
+                StyleId::new(1),
+            )
+            .await;
 
         assert!(result.is_ok(), "{result:?}");
         assert_eq!(result.unwrap().len(), vowel_phoneme_vector.len());
@@ -1777,7 +2095,8 @@ mod tests {
 
         let result = syntesizer
             .0
-            .decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1));
+            .decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1))
+            .await;
 
         assert!(result.is_ok(), "{result:?}");
         assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs
index f31b8ecb1..c2920398b 100644
--- a/crates/voicevox_core/src/voice_model.rs
+++ b/crates/voicevox_core/src/voice_model.rs
@@ -62,7 +62,7 @@ impl VoiceModelId {
 }
 
 #[self_referencing]
-struct Inner<A: Async> {
+pub(crate) struct Inner<A: Async> {
     header: VoiceModelHeader,
 
     #[borrows(header)]
@@ -182,11 +182,11 @@ impl<A: Async> Inner<A> {
         &self.borrow_header().metas
     }
 
-    fn header(&self) -> &VoiceModelHeader {
+    pub(crate) fn header(&self) -> &VoiceModelHeader {
         self.borrow_header()
     }
 
-    async fn read_inference_models(
+    pub(crate) async fn read_inference_models(
         &self,
     ) -> LoadModelResult<InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain>> {
         let path = &self.borrow_header().path;
@@ -412,12 +412,9 @@ impl InferenceDomainMap<ManifestDomains> {
 pub(crate) mod blocking {
     use std::path::Path;
 
-    use crate::{
-        asyncs::SingleTasked, error::LoadModelResult, future::FutureExt as _,
-        infer::domains::InferenceDomainMap, VoiceModelMeta,
-    };
+    use crate::{asyncs::SingleTasked, future::FutureExt as _, VoiceModelMeta};
 
-    use super::{Inner, ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId};
+    use super::{Inner, VoiceModelId};
 
     /// 音声モデルファイル。
     ///
@@ -425,17 +422,15 @@ pub(crate) mod blocking {
     pub struct VoiceModelFile(Inner<SingleTasked>);
 
     impl self::VoiceModelFile {
-        pub(crate) fn read_inference_models(
-            &self,
-        ) -> LoadModelResult<InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain>> {
-            self.0.read_inference_models().block_on()
-        }
-
         /// VVMファイルを開く。
         pub fn open(path: impl AsRef<Path>) -> crate::Result<Self> {
             Inner::open(path).block_on().map(Self)
         }
 
+        pub(crate) fn inner(&self) -> &Inner<SingleTasked> {
+            &self.0
+        }
+
         /// ID。
         pub fn id(&self) -> VoiceModelId {
             self.0.id()
@@ -445,22 +440,15 @@ pub(crate) mod blocking {
         pub fn metas(&self) -> &VoiceModelMeta {
             self.0.metas()
         }
-
-        pub(crate) fn header(&self) -> &VoiceModelHeader {
-            self.0.header()
-        }
     }
 }
 
 pub(crate) mod nonblocking {
     use std::path::Path;
 
-    use crate::{
-        asyncs::BlockingThreadPool, error::LoadModelResult, infer::domains::InferenceDomainMap,
-        Result, VoiceModelMeta,
-    };
+    use crate::{asyncs::BlockingThreadPool, Result, VoiceModelMeta};
 
-    use super::{Inner, ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId};
+    use super::{Inner, VoiceModelId};
 
     /// 音声モデルファイル。
     ///
@@ -475,12 +463,6 @@ pub(crate) mod nonblocking {
     pub struct VoiceModelFile(Inner<BlockingThreadPool>);
 
     impl self::VoiceModelFile {
-        pub(crate) async fn read_inference_models(
-            &self,
-        ) -> LoadModelResult<InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain>> {
-            self.0.read_inference_models().await
-        }
-
         /// VVMファイルを開く。
         pub async fn open(path: impl AsRef<Path>) -> Result<Self> {
             Inner::open(path).await.map(Self)
@@ -491,6 +473,10 @@ pub(crate) mod nonblocking {
             self.0.into_heads().zip.into_inner().close().await;
         }
 
+        pub(crate) fn inner(&self) -> &Inner<BlockingThreadPool> {
+            &self.0
+        }
+
         /// ID。
         pub fn id(&self) -> VoiceModelId {
             self.0.id()
@@ -500,10 +486,6 @@ pub(crate) mod nonblocking {
         pub fn metas(&self) -> &VoiceModelMeta {
             self.0.metas()
         }
-
-        pub(crate) fn header(&self) -> &VoiceModelHeader {
-            self.0.header()
-        }
     }
 }
 
diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml
index 602824543..d5168b3c5 100644
--- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml
+++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml
@@ -53,11 +53,11 @@ metas = '''
 stderr.windows = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
 {windows-video-cards}
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 stderr.unix = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 
 [compatible_engine_load_model_before_initialize]
@@ -123,11 +123,11 @@ output."こんにちは、音声合成の世界へようこそ".wav_length = 176
 stderr.windows = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
 {windows-video-cards}
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 stderr.unix = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 
 [synthesizer_new_output_json]
@@ -185,11 +185,11 @@ metas = '''
 stderr.windows = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
 {windows-video-cards}
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 stderr.unix = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 
 [tts_via_audio_query]
@@ -197,22 +197,22 @@ output."こんにちは、音声合成の世界へようこそ".wav_length = 176
 stderr.windows = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
 {windows-video-cards}
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 stderr.unix = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 
 [user_dict_load]
 stderr.windows = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
 {windows-video-cards}
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 stderr.unix = '''
 {timestamp}  INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}'
-{timestamp}  INFO voicevox_core::synthesizer::blocking: CPUを利用します
+{timestamp}  INFO voicevox_core::synthesizer::inner: CPUを利用します
 '''
 
 [user_dict_manipulate]
diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs
index 58791ead1..a2d1c2475 100644
--- a/crates/voicevox_core_python_api/src/lib.rs
+++ b/crates/voicevox_core_python_api/src/lib.rs
@@ -1055,9 +1055,11 @@ mod asyncio {
 
     #[pyclass]
     pub(crate) struct Synthesizer {
+        // FIXME: `Arc<voicevox_core::nonblocking::Synthesizer>`ではなく、`Arc<Closable<_>>`を
+        // `clone`する
         synthesizer: Arc<
             Closable<
-                voicevox_core::nonblocking::Synthesizer<voicevox_core::nonblocking::OpenJtalk>,
+                Arc<voicevox_core::nonblocking::Synthesizer<voicevox_core::nonblocking::OpenJtalk>>,
                 Self,
                 Tokio,
             >,
@@ -1088,7 +1090,7 @@ mod asyncio {
                     cpu_num_threads,
                 },
             );
-            let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?;
+            let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?.into();
             let synthesizer = Closable::new(synthesizer).into();
             Ok(Self { synthesizer })
         }