diff --git a/.env.production b/.env.production index 1fe00d6ff2..7296599456 100644 --- a/.env.production +++ b/.env.production @@ -10,3 +10,4 @@ VITE_DEFAULT_ENGINE_INFOS=`[ } ]` VITE_GTM_CONTAINER_ID=GTM-DUMMY +CAPACITOR_ADDRESS= diff --git a/.env.test b/.env.test index a2c27972a3..107fa8087e 100644 --- a/.env.test +++ b/.env.test @@ -10,3 +10,4 @@ VITE_DEFAULT_ENGINE_INFOS=`[ } ]` VITE_GTM_CONTAINER_ID=GTM-DUMMY +CAPACITOR_ADDRESS= diff --git a/.gitignore b/.gitignore index c3d792f470..c6b6b82777 100644 --- a/.gitignore +++ b/.gitignore @@ -41,5 +41,5 @@ electron-builder.yml # generated licenses.json /*licenses.json -# generated speakerInfo.json -public/speakerInfos.json +# generated speakerInfo +public/speakerInfos diff --git a/README.md b/README.md index 5bc838bab9..cc224f2cab 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ npm ci ## 実行 以下のコマンドで Vite の開発サーバーを起動し、Capacitor をライブリロードモードでセットアップします。 +PC のプライベート IP アドレスは自動で取得されますが、手動で設定する場合は`.env` 内で `CAPACITOR_ADDRESS` を指定してください。 ```bash npm run cap:serve @@ -169,6 +170,7 @@ npm run test-watch:unit # 監視モード ### ブラウザ End to End テスト Electron の機能が不要な、UI や音声合成などの End to End テストを実行します。 + > **Note** > 一部のエンジンの設定を書き換えるテストは、CI(Github Actions)上でのみ実行されるようになっています。 diff --git a/android/app/build.gradle b/android/app/build.gradle index 26c2243d8c..cc6d810b5e 100644 --- a/android/app/build.gradle +++ b/android/app/build.gradle @@ -4,12 +4,6 @@ android { ndkVersion '25.2.9519653' namespace 'jp.hiroshiba.voicevox' - externalNativeBuild { - cmake { - path file('src/main/cpp/CMakeLists.txt') - version '3.22.1' - } - } compileSdkVersion rootProject.ext.compileSdkVersion defaultConfig { @@ -24,12 +18,6 @@ android { // Default: https://android.googlesource.com/platform/frameworks/base/+/282e181b58cf72b6ca770dc7ca5f91f135444502/tools/aapt/AaptAssets.cpp#61 ignoreAssetsPattern '!.svn:!.git:!.ds_store:!*.scc:.*:!CVS:!thumbs.db:!picasa.ini:!*~' } - externalNativeBuild { - cmake { - arguments '-DANDROID_STL=c++_shared' - } - } - ndk.abiFilters 'arm64-v8a', 'x86_64' testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" @@ -53,6 +41,25 @@ repositories { dirs '../capacitor-cordova-android-plugins/src/main/libs', 'libs' } } +def urlZipFile = { name, path, url -> + File zipFile = new File("$buildDir/download/${name}.zip") + zipFile.parentFile.mkdirs() + if (!zipFile.exists()) { + new URL(url).withInputStream { downloadStream -> + zipFile.withOutputStream { fileOut -> + fileOut << downloadStream + } + } + } + def tree = zipTree(zipFile) + def fileName = path.split("/")[-1] + def outFile = file("$buildDir/download/${fileName}") + tree.visit { file -> + if (file.getPath() != path) return + file.copyTo(outFile) + } + files(outFile) +} dependencies { implementation fileTree(include: ['*.jar'], dir: 'libs') @@ -66,7 +73,19 @@ dependencies { androidTestImplementation "androidx.test.espresso:espresso-core:$androidxEspressoCoreVersion" implementation project(':capacitor-cordova-android-plugins') - implementation group: 'com.microsoft.onnxruntime', name: 'onnxruntime-android', version: '1.14.0' + // TODO: ちゃんと公開されたらそれに置き換える + implementation urlZipFile("voicevoxcore-android", "jp/hiroshiba/voicevoxcore/voicevoxcore-android/0.15.0-preview.13/voicevoxcore-android-0.15.0-preview.13.aar", "https://github.com/VOICEVOX/voicevox_core/releases/download/0.15.0-preview.13/java_packages.zip") + + // https://mvnrepository.com/artifact/com.google.code.gson/gson + implementation group: 'com.google.code.gson', name: 'gson', version: "2.10.1" + + // https://mvnrepository.com/artifact/jakarta.validation/jakarta.validation-api + implementation group: 'jakarta.validation', name: 'jakarta.validation-api', version: "3.0.2" + + // https://mvnrepository.com/artifact/jakarta.annotation/jakarta.annotation-api + implementation group: 'jakarta.annotation', name: 'jakarta.annotation-api', version: "2.1.1" + + implementation group: 'com.microsoft.onnxruntime', name: 'onnxruntime-android', version: "1.14.0" } apply from: 'capacitor.build.gradle' diff --git a/android/app/src/main/cpp/CMakeLists.txt b/android/app/src/main/cpp/CMakeLists.txt deleted file mode 100644 index dff7f08f9f..0000000000 --- a/android/app/src/main/cpp/CMakeLists.txt +++ /dev/null @@ -1,56 +0,0 @@ - -# For more information about using CMake with Android Studio, read the -# documentation: https://d.android.com/studio/projects/add-native-code.html - -# Sets the minimum version of CMake required to build the native library. - -cmake_minimum_required(VERSION 3.22.1) - -# Declares and names the project. - -project("voicevox_core_wrapper") - -# Creates and names a library, sets it as either STATIC -# or SHARED, and provides the relative paths to its source code. -# You can define multiple libraries, and CMake builds them for you. -# Gradle automatically packages shared libraries with your APK. - -file(GLOB voicevox_core_wrapper_files ${CMAKE_CURRENT_SOURCE_DIR}/voicevox_core_wrapper/*.cpp) - -add_library( # Sets the name of the library. - voicevox_core_wrapper - - # Sets the library as a shared library. - SHARED - - # Provides a relative path to your source file(s). - ${voicevox_core_wrapper_files}) - -# Searches for a specified prebuilt library and stores the path as a -# variable. Because CMake includes system libraries in the search path by -# default, you only need to specify the name of the public NDK library -# you want to add. CMake verifies that the library exists before -# completing its build. - -find_library( # Sets the name of the path variable. - log-lib - - # Specifies the name of the NDK library that - # you want CMake to locate. - log) - -# Specifies libraries CMake should link to your target library. You -# can link multiple libraries, such as libraries you define in this -# build script, prebuilt third-party libraries, or system libraries. - - -include_directories( - "${CMAKE_CURRENT_SOURCE_DIR}/../jniLibs/include" -) -target_link_libraries( # Specifies the target library. - voicevox_core_wrapper - - # Links the target library to the log library - # included in the NDK. - ${log-lib} - ) diff --git a/android/app/src/main/cpp/voicevox_core_wrapper/core_caller.cpp b/android/app/src/main/cpp/voicevox_core_wrapper/core_caller.cpp deleted file mode 100644 index 95d4b2a9cf..0000000000 --- a/android/app/src/main/cpp/voicevox_core_wrapper/core_caller.cpp +++ /dev/null @@ -1,181 +0,0 @@ -#include -#include "voicevox_core.h" - -typedef struct VoicevoxInitializeOptions (*voicevox_make_default_initialize_options_t)(); - -typedef VoicevoxResultCode (*voicevox_initialize_t)(struct VoicevoxInitializeOptions options); - -typedef const char *(*voicevox_get_version_t)(); - -typedef VoicevoxResultCode (*voicevox_load_model_t)(uint32_t speaker_id); - -typedef bool (*voicevox_is_gpu_mode_t)(); - -typedef bool (*voicevox_is_model_loaded_t)(uint32_t speaker_id); - -typedef void (*voicevox_finalize_t)(); - -typedef const char *(*voicevox_get_metas_json_t)(); - -typedef const char *(*voicevox_get_supported_devices_json_t)(); - -typedef VoicevoxResultCode (*voicevox_predict_duration_t)(uintptr_t length, - int64_t *phoneme_vector, - uint32_t speaker_id, - uintptr_t *output_predict_duration_data_length, - float **output_predict_duration_data); - -typedef void (*voicevox_predict_duration_data_free_t)(float *predict_duration_data); - -typedef VoicevoxResultCode (*voicevox_predict_intonation_t)(uintptr_t length, - int64_t *vowel_phoneme_vector, - int64_t *consonant_phoneme_vector, - int64_t *start_accent_vector, - int64_t *end_accent_vector, - int64_t *start_accent_phrase_vector, - int64_t *end_accent_phrase_vector, - uint32_t speaker_id, - uintptr_t *output_predict_intonation_data_length, - float **output_predict_intonation_data); - -typedef void (*voicevox_predict_intonation_data_free_t)(float *predict_intonation_data); - -typedef VoicevoxResultCode (*voicevox_decode_t)(uintptr_t length, - uintptr_t phoneme_size, - float *f0, - float *phoneme_vector, - uint32_t speaker_id, - uintptr_t *output_decode_data_length, - float **output_decode_data); - -typedef void (*voicevox_decode_data_free_t)(float *decode_data); - -typedef struct VoicevoxAudioQueryOptions (*voicevox_make_default_audio_query_options_t)(); - -typedef VoicevoxResultCode (*voicevox_audio_query_t)(const char *text, - uint32_t speaker_id, - struct VoicevoxAudioQueryOptions options, - char **output_audio_query_json); - -typedef struct VoicevoxAccentPhrasesOptions (*voicevox_make_default_accent_phrases_options_t)(); - -typedef VoicevoxResultCode (*voicevox_accent_phrases_t)(const char *text, - uint32_t speaker_id, - struct VoicevoxAccentPhrasesOptions options, - char **output_accent_phrases_json); - -typedef VoicevoxResultCode (*voicevox_mora_length_t)(const char *accent_phrases_json, - uint32_t speaker_id, - char **output_accent_phrases_json); - -typedef VoicevoxResultCode (*voicevox_mora_pitch_t)(const char *accent_phrases_json, - uint32_t speaker_id, - char **output_accent_phrases_json); - -typedef VoicevoxResultCode (*voicevox_mora_data_t)(const char *accent_phrases_json, - uint32_t speaker_id, - char **output_accent_phrases_json); - -typedef struct VoicevoxSynthesisOptions (*voicevox_make_default_synthesis_options_t)(); - -typedef VoicevoxResultCode (*voicevox_synthesis_t)(const char *audio_query_json, - uint32_t speaker_id, - struct VoicevoxSynthesisOptions options, - uintptr_t *output_wav_length, - uint8_t **output_wav); - -typedef struct VoicevoxTtsOptions (*voicevox_make_default_tts_options_t)(); - -typedef VoicevoxResultCode (*voicevox_tts_t)(const char *text, - uint32_t speaker_id, - struct VoicevoxTtsOptions options, - uintptr_t *output_wav_length, - uint8_t **output_wav); - -typedef void (*voicevox_audio_query_json_free_t)(char *audio_query_json); - -typedef void (*voicevox_accent_phrases_json_free_t)(char *accented_phrase_json); - -typedef void (*voicevox_wav_free_t)(uint8_t *wav); - -typedef const char *(*voicevox_error_result_to_message_t)(VoicevoxResultCode result_code); - -class VoicevoxCore { -public: - VoicevoxCore() { - auto core = - dlopen("libvoicevox_core.so", RTLD_LAZY); - voicevox_make_default_initialize_options = (voicevox_make_default_initialize_options_t) dlsym(core, - "voicevox_make_default_initialize_options"); - voicevox_initialize = (voicevox_initialize_t) dlsym(core, "voicevox_initialize"); - voicevox_get_version = (voicevox_get_version_t) dlsym(core, "voicevox_get_version"); - voicevox_load_model = (voicevox_load_model_t) dlsym(core, "voicevox_load_model"); - voicevox_is_gpu_mode = (voicevox_is_gpu_mode_t) dlsym(core, "voicevox_is_gpu_mode"); - voicevox_is_model_loaded = (voicevox_is_model_loaded_t) dlsym(core, "voicevox_is_model_loaded"); - voicevox_finalize = (voicevox_finalize_t) dlsym(core, "voicevox_finalize"); - voicevox_get_metas_json = (voicevox_get_metas_json_t) dlsym(core, "voicevox_get_metas_json"); - voicevox_get_supported_devices_json = (voicevox_get_supported_devices_json_t) dlsym(core, - "voicevox_get_supported_devices_json"); - voicevox_predict_duration = (voicevox_predict_duration_t) dlsym(core, "voicevox_predict_duration"); - voicevox_predict_duration_data_free = (voicevox_predict_duration_data_free_t) dlsym(core, - "voicevox_predict_duration_data_free"); - voicevox_predict_intonation = (voicevox_predict_intonation_t) dlsym(core, "voicevox_predict_intonation"); - voicevox_predict_intonation_data_free = (voicevox_predict_intonation_data_free_t) dlsym(core, - "voicevox_predict_intonation_data_free"); - voicevox_decode = (voicevox_decode_t) dlsym(core, "voicevox_decode"); - voicevox_decode_data_free = (voicevox_decode_data_free_t) dlsym(core, "voicevox_decode_data_free"); - voicevox_make_default_audio_query_options = (voicevox_make_default_audio_query_options_t) dlsym(core, - "voicevox_make_default_audio_query_options"); - voicevox_audio_query = (voicevox_audio_query_t) dlsym(core, "voicevox_audio_query"); - voicevox_make_default_accent_phrases_options = (voicevox_make_default_accent_phrases_options_t) dlsym(core, - "voicevox_make_default_accent_phrases_options"); - voicevox_accent_phrases = (voicevox_accent_phrases_t) dlsym(core, "voicevox_accent_phrases"); - voicevox_mora_length = (voicevox_mora_length_t) dlsym(core, "voicevox_mora_length"); - voicevox_mora_pitch = (voicevox_mora_pitch_t) dlsym(core, "voicevox_mora_pitch"); - voicevox_mora_data = (voicevox_mora_data_t) dlsym(core, "voicevox_mora_data"); - voicevox_make_default_synthesis_options = (voicevox_make_default_synthesis_options_t) dlsym(core, - "voicevox_make_default_synthesis_options"); - voicevox_synthesis = (voicevox_synthesis_t) dlsym(core, "voicevox_synthesis"); - voicevox_make_default_tts_options = (voicevox_make_default_tts_options_t) dlsym(core, - "voicevox_make_default_tts_options"); - voicevox_tts = (voicevox_tts_t) dlsym(core, "voicevox_tts"); - voicevox_audio_query_json_free = (voicevox_audio_query_json_free_t) dlsym(core, - "voicevox_audio_query_json_free"); - voicevox_accent_phrases_json_free = (voicevox_accent_phrases_json_free_t) dlsym(core, - "voicevox_accent_phrases_json_free"); - voicevox_wav_free = (voicevox_wav_free_t) dlsym(core, "voicevox_wav_free"); - voicevox_error_result_to_message = (voicevox_error_result_to_message_t) dlsym(core, - "voicevox_error_result_to_message"); - } - - voicevox_make_default_initialize_options_t voicevox_make_default_initialize_options; - voicevox_initialize_t voicevox_initialize; - voicevox_get_version_t voicevox_get_version; - voicevox_load_model_t voicevox_load_model; - voicevox_is_gpu_mode_t voicevox_is_gpu_mode; - voicevox_is_model_loaded_t voicevox_is_model_loaded; - voicevox_finalize_t voicevox_finalize; - voicevox_get_metas_json_t voicevox_get_metas_json; - voicevox_get_supported_devices_json_t voicevox_get_supported_devices_json; - voicevox_predict_duration_t voicevox_predict_duration; - voicevox_predict_duration_data_free_t voicevox_predict_duration_data_free; - voicevox_predict_intonation_t voicevox_predict_intonation; - voicevox_predict_intonation_data_free_t voicevox_predict_intonation_data_free; - voicevox_decode_t voicevox_decode; - voicevox_decode_data_free_t voicevox_decode_data_free; - voicevox_make_default_audio_query_options_t voicevox_make_default_audio_query_options; - voicevox_audio_query_t voicevox_audio_query; - voicevox_make_default_accent_phrases_options_t voicevox_make_default_accent_phrases_options; - voicevox_accent_phrases_t voicevox_accent_phrases; - voicevox_mora_length_t voicevox_mora_length; - voicevox_mora_pitch_t voicevox_mora_pitch; - voicevox_mora_data_t voicevox_mora_data; - voicevox_make_default_synthesis_options_t voicevox_make_default_synthesis_options; - voicevox_synthesis_t voicevox_synthesis; - voicevox_make_default_tts_options_t voicevox_make_default_tts_options; - voicevox_tts_t voicevox_tts; - voicevox_audio_query_json_free_t voicevox_audio_query_json_free; - voicevox_accent_phrases_json_free_t voicevox_accent_phrases_json_free; - voicevox_wav_free_t voicevox_wav_free; - voicevox_error_result_to_message_t voicevox_error_result_to_message; -}; diff --git a/android/app/src/main/cpp/voicevox_core_wrapper/logger.cpp b/android/app/src/main/cpp/voicevox_core_wrapper/logger.cpp deleted file mode 100644 index ffb792724e..0000000000 --- a/android/app/src/main/cpp/voicevox_core_wrapper/logger.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// https://stackoverflow.com/questions/10531050/redirect-stdout-to-logcat-in-android-ndk -#include -#include -#include - -struct LogParams { - int fd; - android_LogPriority priority; -}; -static LogParams stdoutParams; -static LogParams stderrParams; - -static void *logInBackground(void *rawParams) { - LogParams params = *static_cast(rawParams); - ssize_t rdsz; - char buf[1024]; - while ((rdsz = read(params.fd, buf, sizeof buf - 1)) > 0) { - if (buf[rdsz - 1] == '\n') --rdsz; - buf[rdsz] = 0; - __android_log_write(params.priority, "voicevox_core", buf); - } - return nullptr; -} - -static void startLogger() { - int stdoutPfd[2]; - int stderrPfd[2]; - pthread_t thr[2]; - pipe(stdoutPfd); - dup2(stdoutPfd[1], 1); - pipe(stderrPfd); - dup2(stderrPfd[1], 2); - - stdoutParams = { - .fd = stdoutPfd[0], - .priority = ANDROID_LOG_INFO - }; - - if (pthread_create( - &thr[0], nullptr, logInBackground, &stdoutParams - ) != 0) - return; - pthread_detach(thr[0]); - - stderrParams = { - .fd = stderrPfd[0], - .priority = ANDROID_LOG_WARN - }; - - if (pthread_create( - &thr[1], nullptr, logInBackground, &stderrParams - ) != 0) - return; - pthread_detach(thr[1]); -} diff --git a/android/app/src/main/cpp/voicevox_core_wrapper/main.cpp b/android/app/src/main/cpp/voicevox_core_wrapper/main.cpp deleted file mode 100644 index 533d418884..0000000000 --- a/android/app/src/main/cpp/voicevox_core_wrapper/main.cpp +++ /dev/null @@ -1,347 +0,0 @@ -#include -#include -#include -#include -#include "core_caller.cpp" -#include "logger.cpp" - -#define LOG_TAG "voicevox_core_wrapper" - -VoicevoxCore *voicevoxCore; - -bool assertCoreLoaded(JNIEnv *env) { - if (!voicevoxCore) { - __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "voicevoxCore is not loaded"); - jclass jExceptionClass = env->FindClass("java/lang/RuntimeException"); - env->ThrowNew(jExceptionClass, "voicevoxCore is not loaded"); - return false; - } - return true; -} - -// 成功だったらtrueを返す -bool throwExceptionIfError(JNIEnv *env, VoicevoxResultCode code) { - if (!voicevoxCore) { - return false; - } - if (code == 0) { - return false; - } - jclass jExceptionClass = env->FindClass("jp/hiroshiba/voicevox/VoicevoxCore$VoicevoxException"); - auto message = voicevoxCore->voicevox_error_result_to_message(code); - env->ThrowNew(jExceptionClass, message); - return true; -} - - -extern "C" -JNIEXPORT void JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_loadLibrary(JNIEnv *env, __attribute__((unused)) jobject thiz) { - __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "loadLibrary"); - voicevoxCore = new VoicevoxCore(); - startLogger(); - - if (!voicevoxCore) { - jclass jExceptionClass = env->FindClass("java/lang/RuntimeException"); - auto error = std::string(dlerror()); - env->ThrowNew(jExceptionClass, (std::string("loadLibrary failed: ") + error).c_str()); - return; - } - __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "loadLibrary success"); -} - -extern "C" -JNIEXPORT jstring -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxGetSupportedDevicesJson( - JNIEnv *env, - __attribute__((unused)) jobject thiz -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - return env-> - NewStringUTF(voicevoxCore->voicevox_get_supported_devices_json()); -} - -extern "C" -JNIEXPORT jstring -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxGetVersion( - JNIEnv *env, - __attribute__((unused)) jobject thiz -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - return env-> - NewStringUTF(voicevoxCore->voicevox_get_version()); -} - -extern "C" -JNIEXPORT jstring -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxGetMetasJson( - JNIEnv *env, - __attribute__((unused)) jobject thiz -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - return env-> - NewStringUTF(voicevoxCore->voicevox_get_metas_json()); -} - -extern "C" -JNIEXPORT jstring JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxErrorResultToMessage( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jint status_code -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - auto message = voicevoxCore->voicevox_error_result_to_message(static_cast(status_code)); - - return env->NewStringUTF(message); -} - -extern "C" -JNIEXPORT void JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxInitialize( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jstring openJtalkDictPath -) { - if (!assertCoreLoaded(env)) { - return; - } - - auto openJtalkDictPathStr = env->GetStringUTFChars(openJtalkDictPath, nullptr); - auto options = voicevoxCore->voicevox_make_default_initialize_options(); - options.open_jtalk_dict_dir = openJtalkDictPathStr; - - auto result = voicevoxCore->voicevox_initialize(options); - env->ReleaseStringUTFChars(openJtalkDictPath, openJtalkDictPathStr); - - throwExceptionIfError(env, result); -} - -extern "C" -JNIEXPORT void JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxLoadModel( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jint speakerId -) { - if (!assertCoreLoaded(env)) { - return; - } - - auto result = voicevoxCore->voicevox_load_model(speakerId); - - throwExceptionIfError(env, result); -} - -extern "C" -JNIEXPORT jboolean JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxIsModelLoaded( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jint speakerId -) { - if (!assertCoreLoaded(env)) { - return false; - } - - auto result = voicevoxCore->voicevox_is_model_loaded(speakerId); - - return result; -} - -extern "C" -JNIEXPORT jstring JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxAudioQuery( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jstring text, - jint speakerId -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - auto textCStr = env->GetStringUTFChars(text, nullptr); - auto options = voicevoxCore->voicevox_make_default_audio_query_options(); - options.kana = false; - - char *result; - - auto resultCode = voicevoxCore->voicevox_audio_query(textCStr, speakerId, options, &result); - env->ReleaseStringUTFChars(text, textCStr); - - if (throwExceptionIfError(env, resultCode)) { - return nullptr; - } - - auto resultJStr = env->NewStringUTF(result); - voicevoxCore->voicevox_audio_query_json_free(result); - - return resultJStr; -} - -extern "C" -JNIEXPORT jstring JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxAccentPhrases( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jstring text, - jint speakerId -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - auto textCStr = env->GetStringUTFChars(text, nullptr); - auto options = voicevoxCore->voicevox_make_default_accent_phrases_options(); - options.kana = false; - - char *result; - - auto resultCode = voicevoxCore->voicevox_accent_phrases(textCStr, speakerId, options, &result); - env->ReleaseStringUTFChars(text, textCStr); - - if (throwExceptionIfError(env, resultCode)) { - return nullptr; - } - - auto resultJStr = env->NewStringUTF(result); - voicevoxCore->voicevox_accent_phrases_json_free(result); - - return resultJStr; -} - -extern "C" -JNIEXPORT jstring JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxMoraLength( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jstring accentPhrases, - jint speakerId -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - auto accentPhrasesCStr = env->GetStringUTFChars(accentPhrases, nullptr); - - char *result; - - auto resultCode = voicevoxCore->voicevox_mora_length(accentPhrasesCStr, speakerId, &result); - env->ReleaseStringUTFChars(accentPhrases, accentPhrasesCStr); - - if (throwExceptionIfError(env, resultCode)) { - return nullptr; - } - - auto resultJStr = env->NewStringUTF(result); - voicevoxCore->voicevox_accent_phrases_json_free(result); - - return resultJStr; -} - -extern "C" -JNIEXPORT jstring JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxMoraPitch( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jstring accentPhrases, - jint speakerId -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - auto accentPhrasesCStr = env->GetStringUTFChars(accentPhrases, nullptr); - - char *result; - - auto resultCode = voicevoxCore->voicevox_mora_pitch(accentPhrasesCStr, speakerId, &result); - env->ReleaseStringUTFChars(accentPhrases, accentPhrasesCStr); - - if (throwExceptionIfError(env, resultCode)) { - return nullptr; - } - - auto resultJStr = env->NewStringUTF(result); - voicevoxCore->voicevox_accent_phrases_json_free(result); - - return resultJStr; -} - -extern "C" -JNIEXPORT jstring JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxMoraData( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jstring accentPhrases, - jint speakerId -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - auto accentPhrasesCStr = env->GetStringUTFChars(accentPhrases, nullptr); - - char *result; - - auto resultCode = voicevoxCore->voicevox_mora_data(accentPhrasesCStr, speakerId, &result); - env->ReleaseStringUTFChars(accentPhrases, accentPhrasesCStr); - - if (throwExceptionIfError(env, resultCode)) { - return nullptr; - } - - auto resultJStr = env->NewStringUTF(result); - voicevoxCore->voicevox_accent_phrases_json_free(result); - - return resultJStr; -} - -extern "C" -JNIEXPORT jbyteArray JNICALL -Java_jp_hiroshiba_voicevox_VoicevoxCore_voicevoxSynthesis( - JNIEnv *env, - __attribute__((unused)) jobject thiz, - jstring audioQuery, - jint speakerId, - jboolean enableInterrogativeUpspeak -) { - if (!assertCoreLoaded(env)) { - return nullptr; - } - - auto audioQueryCStr = env->GetStringUTFChars(audioQuery, nullptr); - auto options = voicevoxCore->voicevox_make_default_synthesis_options(); - options.enable_interrogative_upspeak = enableInterrogativeUpspeak; - - uint8_t *result; - uintptr_t resultSize; - - auto resultCode = voicevoxCore->voicevox_synthesis(audioQueryCStr, speakerId, options, &resultSize, &result); - env->ReleaseStringUTFChars(audioQuery, audioQueryCStr); - - if (throwExceptionIfError(env, resultCode)) { - return nullptr; - } - - auto resultByteArray = env->NewByteArray(static_cast(resultSize)); - auto resultByteArrayElements = env->GetByteArrayElements(resultByteArray, nullptr); - memcpy(resultByteArrayElements, result, resultSize); - env->ReleaseByteArrayElements(resultByteArray, resultByteArrayElements, 0); - voicevoxCore->voicevox_wav_free(result); - - return resultByteArray; -} diff --git a/android/app/src/main/java/jp/hiroshiba/voicevox/CorePlugin.kt b/android/app/src/main/java/jp/hiroshiba/voicevox/CorePlugin.kt index a02c56a01a..54184b1fad 100644 --- a/android/app/src/main/java/jp/hiroshiba/voicevox/CorePlugin.kt +++ b/android/app/src/main/java/jp/hiroshiba/voicevox/CorePlugin.kt @@ -7,7 +7,10 @@ import com.getcapacitor.Plugin import com.getcapacitor.PluginCall import com.getcapacitor.PluginMethod import com.getcapacitor.annotation.CapacitorPlugin +import com.google.gson.Gson +import jp.hiroshiba.voicevoxcore.* import java.io.File +import java.io.FileFilter import java.io.FileOutputStream import java.io.IOException import java.util.Base64 @@ -16,34 +19,41 @@ import java.util.zip.ZipInputStream @CapacitorPlugin(name = "VoicevoxCore") class CorePlugin : Plugin() { - var core: VoicevoxCore? = null - override fun load() { - val modelPath: String = try { - extractIfNotFound("model.zip") - } catch (e: IOException) { - throw RuntimeException(e) - } - core = VoicevoxCore(modelPath) - } + lateinit var openJtalk: OpenJtalk + lateinit var synthesizer: Synthesizer + lateinit var voiceModels: List + lateinit var gson: Gson @PluginMethod fun getVersion(call: PluginCall) { val ret = JSObject() - ret.put("value", core!!.voicevoxGetVersion()) + ret.put("value", BuildConfig.VERSION_NAME) call.resolve(ret) } @PluginMethod fun getSupportedDevicesJson(call: PluginCall) { val ret = JSObject() - ret.put("value", core!!.voicevoxGetSupportedDevicesJson()) + // TODO: ハードコードをやめてちゃんと取得する + ret.put("value", "{\"cpu\": true, \"cuda\": false, \"dml\": false}") call.resolve(ret) } @PluginMethod fun getMetasJson(call: PluginCall) { val ret = JSObject() - ret.put("value", core!!.voicevoxGetMetasJson()) + val flatMetas = voiceModels.flatMap { it.metas.asIterable() } + val metas = flatMetas.map { it.speakerUuid }.toSet().map { speakerUuid -> + val baseMetas = flatMetas.filter { it.speakerUuid == speakerUuid } + val styles = baseMetas.flatMap { it.styles.asIterable() } + val mergedMetas = + gson.toJsonTree(baseMetas[0]).asJsonObject + mergedMetas.add("styles", gson.toJsonTree(styles)) + + mergedMetas + } + val metasJson = gson.toJson(metas) + ret.put("value", metasJson) call.resolve(ret) } @@ -56,12 +66,32 @@ class CorePlugin : Plugin() { } catch (e: IOException) { throw RuntimeException(e) } + val modelPath: File = try { + File(extractIfNotFound("model.zip")) + } catch (e: IOException) { + throw RuntimeException(e) + } try { - core!!.voicevoxInitialize( - dictPath, - ) + gson = Gson() + Log.i("CorePlugin", "Initializing OpenJtalk") + openJtalk = OpenJtalk(dictPath) + + Log.i("CorePlugin", "Initializing Synthesizer") + synthesizer = Synthesizer.builder(openJtalk).build() + + Log.i("CorePlugin", "Initializing VoiceModels") + val vvms = modelPath.listFiles(FileFilter { it.name.endsWith(".vvm") }) + if (vvms == null) { + call.reject("Couldn't get vvms") + Log.e("CorePlugin", "Couldn't get vvms") + return + } + voiceModels = vvms.map { + VoiceModel(it.absolutePath) + } + call.resolve() - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @@ -75,9 +105,14 @@ class CorePlugin : Plugin() { } try { - core!!.voicevoxLoadModel(speakerId) + val model = getVoiceModelFromSpeakerId(speakerId) + if (model == null) { + call.reject("Unknown speaker id") + return + } + synthesizer.loadVoiceModel(model) call.resolve() - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @@ -91,11 +126,16 @@ class CorePlugin : Plugin() { } try { - val result = core!!.voicevoxIsModelLoaded(speakerId) + val model = getVoiceModelFromSpeakerId(speakerId) + if (model == null) { + call.reject("Unknown speaker id") + return + } + val result = synthesizer.isLoadedVoiceModel(model.id) val ret = JSObject() ret.put("value", result) call.resolve(ret) - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @@ -110,11 +150,11 @@ class CorePlugin : Plugin() { } try { - val audioQuery = core!!.voicevoxAudioQuery(text, speakerId) + val audioQuery = synthesizer.createAudioQuery(text, speakerId) val ret = JSObject() - ret.put("value", audioQuery) + ret.put("value", gson.toJson(audioQuery)) call.resolve(ret) - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @@ -129,89 +169,98 @@ class CorePlugin : Plugin() { } try { - val accentPhrases = core!!.voicevoxAccentPhrases(text, speakerId) + val accentPhrases = synthesizer.createAccentPhrases(text, speakerId) val ret = JSObject() - ret.put("value", accentPhrases) + ret.put("value", gson.toJson(accentPhrases)) call.resolve(ret) - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @PluginMethod fun moraPitch(call: PluginCall) { - val accentPhrases = call.getString("accentPhrases") + val accentPhrasesJson = call.getString("accentPhrases") val speakerId = call.getInt("speakerId") - if (accentPhrases == null || speakerId == null) { + if (accentPhrasesJson == null || speakerId == null) { call.reject("Type mismatch") return } + val accentPhrases = + gson.fromJson(accentPhrasesJson, Array::class.java).asList() try { - val newAccentPhrases = core!!.voicevoxMoraPitch(accentPhrases, speakerId) + val newAccentPhrases = synthesizer.replaceMoraPitch(accentPhrases, speakerId) val ret = JSObject() - ret.put("value", newAccentPhrases) + ret.put("value", gson.toJson(newAccentPhrases)) call.resolve(ret) - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @PluginMethod - fun moraLength(call: PluginCall) { - val accentPhrases = call.getString("accentPhrases") + fun phonemeLength(call: PluginCall) { + val accentPhrasesJson = call.getString("accentPhrases") val speakerId = call.getInt("speakerId") - if (accentPhrases == null || speakerId == null) { + if (accentPhrasesJson == null || speakerId == null) { call.reject("Type mismatch") return } + val accentPhrases = + gson.fromJson(accentPhrasesJson, Array::class.java).asList() try { - val newAccentPhrases = core!!.voicevoxMoraLength(accentPhrases, speakerId) + val newAccentPhrases = synthesizer.replacePhonemeLength(accentPhrases, speakerId) val ret = JSObject() - ret.put("value", newAccentPhrases) + ret.put("value", gson.toJson(newAccentPhrases)) call.resolve(ret) - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @PluginMethod fun moraData(call: PluginCall) { - val accentPhrases = call.getString("accentPhrases") + val accentPhrasesJson = call.getString("accentPhrases") val speakerId = call.getInt("speakerId") - if (accentPhrases == null || speakerId == null) { + if (accentPhrasesJson == null || speakerId == null) { call.reject("Type mismatch") return } + val accentPhrases = + gson.fromJson(accentPhrasesJson, Array::class.java).asList() try { - val newAccentPhrases = core!!.voicevoxMoraData(accentPhrases, speakerId) + val newAccentPhrases = synthesizer.replaceMoraData(accentPhrases, speakerId) val ret = JSObject() - ret.put("value", newAccentPhrases) + ret.put("value", gson.toJson(newAccentPhrases)) call.resolve(ret) - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @PluginMethod fun synthesis(call: PluginCall) { - val audioQuery = call.getString("audioQuery") + val audioQueryJson = call.getString("audioQuery") val speakerId = call.getInt("speakerId") val enableInterrogativeUpspeak = call.getBoolean("enableInterrogativeUpspeak") - if (audioQuery == null || speakerId == null || enableInterrogativeUpspeak == null) { + if (audioQueryJson == null || speakerId == null || enableInterrogativeUpspeak == null) { call.reject("Type mismatch") return } + val audioQuery = gson.fromJson(audioQueryJson, AudioQuery::class.java) + try { - val result = core!!.voicevoxSynthesis(audioQuery, speakerId, enableInterrogativeUpspeak) + val result = synthesizer.synthesis(audioQuery, speakerId) + .interrogativeUpspeak(enableInterrogativeUpspeak).execute() val ret = JSObject() val encodedResult = Base64.getEncoder().encodeToString(result) ret.put("value", encodedResult) call.resolve(ret) - } catch (e: VoicevoxCore.VoicevoxException) { + } catch (e: VoicevoxException) { call.reject(e.message) } } @@ -236,6 +285,7 @@ class CorePlugin : Plugin() { return destRoot.absolutePath } else if (destHash.exists()) { Log.i("extractIfNotFound", "Outdated (Hashes don't match)") + destRoot.deleteRecursively() } else { Log.i("extractIfNotFound", "Not exists") } @@ -265,4 +315,14 @@ class CorePlugin : Plugin() { Log.i("extractIfNotFound", "Done") return destRoot.absolutePath } + + private fun getVoiceModelFromSpeakerId(speakerId: Int): VoiceModel? { + return voiceModels.find { model -> + model.metas.any { meta -> + meta.styles.any { style -> + style.id == speakerId + } + } + } + } } diff --git a/android/app/src/main/java/jp/hiroshiba/voicevox/VoicevoxCore.kt b/android/app/src/main/java/jp/hiroshiba/voicevox/VoicevoxCore.kt deleted file mode 100644 index 0f42ab7fb8..0000000000 --- a/android/app/src/main/java/jp/hiroshiba/voicevox/VoicevoxCore.kt +++ /dev/null @@ -1,55 +0,0 @@ -package jp.hiroshiba.voicevox - -import android.system.ErrnoException -import android.system.Os - -class VoicevoxCore { - external fun voicevoxGetVersion(): String - external fun voicevoxGetSupportedDevicesJson(): String - external fun voicevoxGetMetasJson(): String - - external fun voicevoxErrorResultToMessage(statusCode: Int): String - - @Throws(VoicevoxException::class) - external fun voicevoxInitialize(openJtalkDictPath: String) - - @Throws(VoicevoxException::class) - external fun voicevoxLoadModel(speakerId: Int) - external fun voicevoxIsModelLoaded(speakerId: Int): Boolean - - @Throws(VoicevoxException::class) - external fun voicevoxAudioQuery(text: String, speakerId: Int): String - - @Throws(VoicevoxException::class) - external fun voicevoxAccentPhrases(text: String, speakerId: Int): String - - @Throws(VoicevoxException::class) - external fun voicevoxMoraPitch(accentPhrases: String, speakerId: Int): String - - @Throws(VoicevoxException::class) - external fun voicevoxMoraLength(accentPhrases: String, speakerId: Int): String - - @Throws(VoicevoxException::class) - external fun voicevoxMoraData(accentPhrases: String, speakerId: Int): String - - @Throws(VoicevoxException::class) - external fun voicevoxSynthesis(audioQuery: String, speakerId: Int, enableInterrogativeUpspeak: Boolean): ByteArray - - class VoicevoxException(override val message: String) : Exception(message) - - constructor(modelPath: String) { - try { - Os.setenv("VV_MODELS_ROOT_DIR", modelPath, true) - } catch (e: ErrnoException) { - throw RuntimeException(e) - } - loadLibrary() - } - - private external fun loadLibrary() - - init { - // System.loadLibrary("voicevox_core"); - System.loadLibrary("voicevox_core_wrapper") - } -} diff --git a/android/app/src/main/jniLibs/README.md b/android/app/src/main/jniLibs/README.md deleted file mode 100644 index 6241767fa6..0000000000 --- a/android/app/src/main/jniLibs/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# android/app/src/main/jniLibs - -このディレクトリにはそれぞれのアーキテクチャの外部ライブラリが入ります。 -以下のように配置して下さい。 - -```yml -jniLibs: - include: - .gitkeep - voicevox_core.h - x86_64: - .gitkeep - libvoicevox_core.so - arm64-v8a: - .gitkeep - libvoicevox_core.so -``` - -| ライブラリ | ダウンロードリンク | -|------------------------------------------------------------|---------------------------------------------------------------------------| -| [VOICEVOX CORE](https://github.com/voicevox/voicevox_core) | | diff --git a/android/app/src/main/jniLibs/arm64-v8a/.gitkeep b/android/app/src/main/jniLibs/arm64-v8a/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/android/app/src/main/jniLibs/include/.gitkeep b/android/app/src/main/jniLibs/include/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/android/app/src/main/jniLibs/x86_64/.gitkeep b/android/app/src/main/jniLibs/x86_64/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/android/app/src/main/res/layout/activity_main.xml b/android/app/src/main/res/layout/activity_main.xml index b5ad138701..aeba3591c1 100644 --- a/android/app/src/main/res/layout/activity_main.xml +++ b/android/app/src/main/res/layout/activity_main.xml @@ -7,6 +7,7 @@ tools:context=".MainActivity"> diff --git a/android/gradle.properties b/android/gradle.properties index 553230f4f0..f738076537 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -9,7 +9,7 @@ # Specifies the JVM arguments used for the daemon process. # The setting is particularly useful for tweaking memory settings. -org.gradle.jvmargs=-Xmx4096M +org.gradle.jvmargs=-Xmx8192M # When configured, Gradle will run in incubating parallel mode. # This option should only be used with decoupled projects. More details, visit diff --git a/android/variables.gradle b/android/variables.gradle index 5946adabdd..7c43d1d478 100644 --- a/android/variables.gradle +++ b/android/variables.gradle @@ -1,5 +1,5 @@ ext { - minSdkVersion = 22 + minSdkVersion = 26 compileSdkVersion = 33 targetSdkVersion = 33 androidxActivityVersion = '1.7.0' diff --git a/build/downloadMobileAssets.mts b/build/downloadMobileAssets.mts index 16076f0644..a516cf7881 100644 --- a/build/downloadMobileAssets.mts +++ b/build/downloadMobileAssets.mts @@ -1,10 +1,12 @@ /** - * voicevox/voicevox_coreのmodelディレクトリをダウンロードしてZip圧縮し、 - * またOpenJTalkの辞書をzip圧縮するスクリプト。 + * スマホ版のためのアセットを準備するスクリプト。 + * - voicevox/voicevox_resourcesのvvm + * - zip圧縮されたOpenJTalkの辞書 */ import path from "path"; import fs from "fs"; import crypto from "crypto"; +import fetch from "node-fetch"; import { runCommand, __dirname } from "./utils.mjs"; let sevenZipCommand: string; @@ -26,8 +28,11 @@ switch (process.platform) { } } const sevenZip = path.resolve(__dirname, "vendored", "7z", sevenZipCommand); +type Release = { + tag_name: string; + assets: { name: string; browser_download_url: string }[]; +}; -// FIXME: ダミーモデルを使っているので製品版に変える const downloadAndCompressModel = async () => { const modelZipPath = path.resolve( __dirname, @@ -40,37 +45,42 @@ const downloadAndCompressModel = async () => { ); return; } - if (fs.existsSync(path.resolve(__dirname, "vendored/voicevox_core"))) { - await runCommand( - "git", - "-C", - __dirname + "/vendored/voicevox_core", - "pull", - "origin", - "main" - ); - } else { - await runCommand( - "git", - "clone", - "https://github.com/VOICEVOX/voicevox_core.git", - __dirname + "/vendored/voicevox_core" - ); + console.log("Downloading model..."); + const releases = await fetch( + "https://api.github.com/repos/voicevox/voicevox_core/releases" + ); + const releasesJson = (await releases.json()) as Release[]; + const latestRelease = releasesJson[0]; + const modelUrl = latestRelease.assets.find((asset) => + asset.name.startsWith("model-") + )?.browser_download_url; + if (!modelUrl) { + throw new Error("Failed to get model url"); } + console.log("Downloading model from " + modelUrl); + const modelPath = path.resolve(__dirname, "vendored", "model.zip"); + + const response = await fetch(modelUrl); + if (!response.ok) { + throw new Error("Failed to download model"); + } + const buffer = await response.arrayBuffer(); + await fs.promises.writeFile(modelPath, Buffer.from(buffer)); + await runCommand( - "git", - "-C", - __dirname + "/vendored/voicevox_core", - "checkout", - "b8c1b316203a0963ce3d3aca787fd392cceba930" + sevenZip, + "x", + modelPath, + "-o" + __dirname + "/vendored", + "-y" ); + await runCommand( sevenZip, "a", "-tzip", modelZipPath, - __dirname + "/vendored/voicevox_core/model/*", - __dirname + "/vendored/voicevox_core/LICENSE" + __dirname + "/vendored/model-" + latestRelease.tag_name + "/*" ); await createFileHash(modelZipPath); @@ -90,9 +100,6 @@ const downloadAndCompressOpenJTalkDict = async () => { return; } - // node-fetchはESModuleなので、import()で読み込む - const { default: fetch } = await import("node-fetch"); - const dictUrl = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1/open_jtalk_dic_utf_8-1.11.tar.gz"; const dictPath = path.resolve( diff --git a/build/generateSpeakerInfo.js b/build/generateSpeakerInfo.js index b57b5c6dbd..3f89f147e8 100644 --- a/build/generateSpeakerInfo.js +++ b/build/generateSpeakerInfo.js @@ -1,6 +1,6 @@ // @ts-check /** - * voicevox/voicevox_engineのモックのSpeakerを使ってspeakerInfos.jsonを生成する + * voicevox/voicevox_resourceのデータからspeakerInfosディレクトリとその中のjsonを生成する */ const path = require("path"); const fs = require("fs"); @@ -11,35 +11,35 @@ const { glob: globBase } = require("glob"); const glob = promisify(globBase); const main = async () => { - const destPath = path.resolve(__dirname, "../public/speakerInfos.json"); + const destPath = path.resolve(__dirname, "../public/speakerInfos"); if (fs.existsSync(destPath)) { - console.log("speakers already exists. skipping conversion."); - return; + fs.rmSync(destPath, { recursive: true, force: true }); } - if (fs.existsSync(path.resolve(__dirname, "vendored/voicevox_engine"))) { + fs.mkdirSync(destPath, { recursive: true }); + if (fs.existsSync(path.resolve(__dirname, "vendored/voicevox_resource"))) { const updater = spawnSync( "git", [ "-C", - __dirname + "/vendored/voicevox_engine", + __dirname + "/vendored/voicevox_resource", "pull", "origin", - "master", + "main", ], { stdio: "inherit", } ); if (updater.status !== 0) { - throw new Error("Failed to update VOICEVOX/voicevox_engine"); + throw new Error("Failed to update VOICEVOX/voicevox_resource"); } } else { const extractor = spawnSync( "git", [ "clone", - "https://github.com/VOICEVOX/voicevox_engine.git", - __dirname + "/vendored/voicevox_engine", + "https://github.com/VOICEVOX/voicevox_resource.git", + __dirname + "/vendored/voicevox_resource", "--depth", "1", ], @@ -48,90 +48,86 @@ const main = async () => { } ); if (extractor.status !== 0) { - throw new Error("Failed to clone VOICEVOX/voicevox_engine"); + throw new Error("Failed to clone VOICEVOX/voicevox_resource"); } } const speakerInfoDir = path.resolve( __dirname, - "vendored/voicevox_engine/speaker_info" + "vendored/voicevox_resource/character_info" ); - const policies = await glob( - path.join(speakerInfoDir, "*", "policy.md").replace(/\\/g, "/") - ); - const portraits = await glob( - path.join(speakerInfoDir, "*", "portrait.png").replace(/\\/g, "/") - ); - const styleIcons = await glob( - path.join(speakerInfoDir, "*", "icons", "*.png").replace(/\\/g, "/") - ); - // https://stackoverflow.com/a/73616013 - const stylePortraits = styleIcons.map((styleIcon) => - styleIcon.replace(/(icons)(?!.*\1)/, "portraits") - ); - const voiceSamples = await glob( - path.join(speakerInfoDir, "*", "voice_samples", "*.wav").replace(/\\/g, "/") - ); + const characters = await fs.promises.readdir(speakerInfoDir); - const metas = JSON.parse( - await fs.promises.readFile( - path.resolve(__dirname, "vendored/voicevox_core/model/metas.json"), + for (const character of characters) { + const characterDir = path.join(speakerInfoDir, character); + const uuid = path.basename(character).split("_")[1]; + const policy = await fs.promises.readFile( + path.join(characterDir, "policy.md"), "utf-8" - ) - ); + ); + const portrait = await fs.promises.readFile( + path.join(characterDir, "portrait.png") + ); - /** @type {string[]} */ - const coreSpeakerUuids = metas.map((meta) => meta.speaker_uuid); + const styleIcons = await fs.promises.readdir( + path.join(characterDir, "icons") + ); + /** @type {string[]} */ + const stylePortraits = await fs.promises + .readdir(path.join(characterDir, "portraits")) + .catch(() => []); - let styleIndex = 0; + const voiceSamples = await glob( + path + .join(speakerInfoDir, "*", "voice_samples", "*.wav") + .replace(/\\/g, "/") + ); + const jsonPath = path.join(destPath, `${uuid}.json`); + console.log(`Generating ${jsonPath}`); + await fs.promises.writeFile( + jsonPath, + JSON.stringify({ + policy: policy, + portrait: portrait.toString("base64"), + + style_infos: await Promise.all( + styleIcons.map(async (style) => { + const id = parseInt(style.split(".")[0]); + const portrait = + (stylePortraits.includes(`${id}.png`) && + (await fs.promises + .readFile(path.join(characterDir, "portraits", `${id}.png`)) + .then((buf) => buf.toString("base64")) + .catch(() => undefined))) || + undefined; - await fs.promises.writeFile( - destPath, - JSON.stringify( - Object.fromEntries( - await Promise.all( - coreSpeakerUuids.map(async (uuid, i) => [ - uuid, - { - policy: await fs.promises.readFile(policies[i], "utf-8"), - portrait: await fs.promises - .readFile(portraits[i]) + const styleDir = path.join(characterDir, "icons", style); + + return { + id, + icon: await fs.promises + .readFile(styleDir) .then((buf) => buf.toString("base64")), - style_infos: await Promise.all( - metas[i].styles.map(async (style) => { - const index = styleIndex++; - return { - id: style.id, - icon: await fs.promises - .readFile(styleIcons[index]) - .then((buf) => buf.toString("base64")), - portrait: await fs.promises - .readFile(stylePortraits[index]) - .then((buf) => buf.toString("base64")) - .catch(() => null), - voice_samples: await Promise.all( - voiceSamples - .filter((voiceSample) => - voiceSample.includes(`/${index}_`) - ) - .map( - async (voiceSample) => - await fs.promises - .readFile(voiceSample) - .then((buf) => buf.toString("base64")) - ) - ), - }; - }) + + portrait, + + voice_samples: await Promise.all( + voiceSamples + .filter((voiceSample) => voiceSample.includes(`/${id}_`)) + .map( + async (voiceSample) => + await fs.promises + .readFile(voiceSample) + .then((buf) => buf.toString("base64")) + ) ), - }, - ]) - ) - ) - ) - ); - console.log("speakerInfos.json generated."); + }; + }) + ), + }) + ); + } }; main(); diff --git a/capacitor.config.ts b/capacitor.config.ts index 2971645780..d1cb12f3fe 100644 --- a/capacitor.config.ts +++ b/capacitor.config.ts @@ -1,5 +1,6 @@ /// import { networkInterfaces } from "os"; +import dotenv from "dotenv"; import { CapacitorConfig } from "@capacitor/cli"; const config: CapacitorConfig = { @@ -14,13 +15,35 @@ const config: CapacitorConfig = { }; if (process.env.CAPACITOR_MODE === "serve") { - const nets = networkInterfaces(); - const net = Object.values(nets)[0]?.find( - (net) => net.family === "IPv4" && !net.internal - ); - if (!net) throw new Error("assert: net != null"); + dotenv.config(); + let address = process.env.CAPACITOR_ADDRESS; + if (!address) { + const nets = networkInterfaces(); + const net = Object.entries(nets) + .flatMap(([name, nets]) => + name.includes("WSL") || + name.includes("VirtualBox") || + name.includes("Loopback") + ? [] + : nets + ) + .find( + (net) => + net && + net.family === "IPv4" && + !net.internal && + (net.address.startsWith("192.168.") || + net.address.startsWith("172.16.") || + net.address.startsWith("10.")) + ); + if (!net) + throw new Error( + "ネットワークを選択できませんでした。.envにCAPACITOR_ADDRESSを設定してください。" + ); + address = net.address; + } config.server = { - url: `http://${net.address}:5173`, + url: `http://${address}:5173`, cleartext: true, }; } diff --git a/package-lock.json b/package-lock.json index d5c53cff6f..de51fd178a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,8 +9,8 @@ "version": "999.999.999", "hasInstallScript": true, "dependencies": { - "@capacitor/android": "5.0.3", - "@capacitor/core": "5.0.3", + "@capacitor/android": "5.5.1", + "@capacitor/core": "5.5.1", "@capacitor/splash-screen": "5.0.2", "@gtm-support/vue-gtm": "1.2.3", "@quasar/extras": "1.10.10", @@ -45,7 +45,7 @@ "zod-to-json-schema": "3.20.1" }, "devDependencies": { - "@capacitor/cli": "5.0.3", + "@capacitor/cli": "5.5.1", "@openapitools/openapi-generator-cli": "2.3.3", "@playwright/test": "1.32.1", "@quasar/vite-plugin": "1.3.0", @@ -149,17 +149,17 @@ } }, "node_modules/@capacitor/android": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/@capacitor/android/-/android-5.0.3.tgz", - "integrity": "sha512-jPN6JJUbN2cCrAjP+0Q4gIwIxleFVChZWh+/gsbDDfSpogdpZNwZnriUNxSRTZayL58tUzLHuxpJxY6hsddDrA==", + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/@capacitor/android/-/android-5.5.1.tgz", + "integrity": "sha512-WTnPnpaEvTtaEtTNRbh06Y1afF7A4plY/4uajAL0WW8tdR1FxieadF357yKGiAT6CudI/B+eOu6rxn6qWuphKg==", "peerDependencies": { - "@capacitor/core": "^5.0.0" + "@capacitor/core": "^5.5.0" } }, "node_modules/@capacitor/cli": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/@capacitor/cli/-/cli-5.0.3.tgz", - "integrity": "sha512-VPmd/uKSCYqeLgNTrG2wVaMoCX8lo2UuXFVwP54W4nXPyG6LsDuOKYytkBItTAJrI3KEXdZWyRFbXjIY48/m6g==", + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/@capacitor/cli/-/cli-5.5.1.tgz", + "integrity": "sha512-/oGd2IIc+k1H/fc7tUzP7vqMtZi0gNcJ4/4wUE2kzAnETxxxHXMM/2V62KfjCby/OOAzJbtI7n5OPlnWE9un1A==", "dev": true, "dependencies": { "@ionic/cli-framework-output": "^2.2.5", @@ -170,7 +170,7 @@ "debug": "^4.3.4", "env-paths": "^2.2.0", "kleur": "^4.1.4", - "native-run": "^1.7.2", + "native-run": "^1.7.3", "open": "^8.4.0", "plist": "^3.0.5", "prompts": "^2.4.2", @@ -306,9 +306,9 @@ "dev": true }, "node_modules/@capacitor/core": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/@capacitor/core/-/core-5.0.3.tgz", - "integrity": "sha512-VxzSoFvoiGDmvlKQMbg7ZZh+w3Z223630kZINDOsbs7UZr5gZws7+m26Y9Oc8n2miI737hE6qOf2VU4H/8Tytg==", + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/@capacitor/core/-/core-5.5.1.tgz", + "integrity": "sha512-VG6Iv8Q7ZAbvjodxpvjcSe0jfxUwZXnvjbi93ehuJ6eYP8U926qLSXyrT/DToZq+F6v/HyGyVgn3mrE/9jW2Tg==", "dependencies": { "tslib": "^2.1.0" } @@ -11951,9 +11951,9 @@ } }, "node_modules/native-run": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/native-run/-/native-run-1.7.2.tgz", - "integrity": "sha512-2aahC8iXIO8BcvEukVMrYwL5sXurkuIGyQgfSGBto832W6ejV+cB5Ww+2/CRxmyozhbxARJ2OMpEGPV8sTqsrQ==", + "version": "1.7.4", + "resolved": "https://registry.npmjs.org/native-run/-/native-run-1.7.4.tgz", + "integrity": "sha512-yDEwTp66vmXpqFiSQzz4sVQgyq5U58gGRovglY4GHh12ITyWa6mh6Lbpm2gViVOVD1JYFtYnwcgr7GTFBinXNA==", "dev": true, "dependencies": { "@ionic/utils-fs": "^3.1.6", @@ -12008,9 +12008,9 @@ } }, "node_modules/native-run/node_modules/tslib": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==", + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==", "dev": true }, "node_modules/natural-compare": { @@ -17411,15 +17411,15 @@ "integrity": "sha512-74bEXKX2h+8rrfQUfsBfuZZHzsEs6Eql4pqy/T4Nn6Y9wNPggQOqD6z6pn5Bl8ZfysKouFZT/UXEH94ummEeQw==" }, "@capacitor/android": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/@capacitor/android/-/android-5.0.3.tgz", - "integrity": "sha512-jPN6JJUbN2cCrAjP+0Q4gIwIxleFVChZWh+/gsbDDfSpogdpZNwZnriUNxSRTZayL58tUzLHuxpJxY6hsddDrA==", + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/@capacitor/android/-/android-5.5.1.tgz", + "integrity": "sha512-WTnPnpaEvTtaEtTNRbh06Y1afF7A4plY/4uajAL0WW8tdR1FxieadF357yKGiAT6CudI/B+eOu6rxn6qWuphKg==", "requires": {} }, "@capacitor/cli": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/@capacitor/cli/-/cli-5.0.3.tgz", - "integrity": "sha512-VPmd/uKSCYqeLgNTrG2wVaMoCX8lo2UuXFVwP54W4nXPyG6LsDuOKYytkBItTAJrI3KEXdZWyRFbXjIY48/m6g==", + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/@capacitor/cli/-/cli-5.5.1.tgz", + "integrity": "sha512-/oGd2IIc+k1H/fc7tUzP7vqMtZi0gNcJ4/4wUE2kzAnETxxxHXMM/2V62KfjCby/OOAzJbtI7n5OPlnWE9un1A==", "dev": true, "requires": { "@ionic/cli-framework-output": "^2.2.5", @@ -17430,7 +17430,7 @@ "debug": "^4.3.4", "env-paths": "^2.2.0", "kleur": "^4.1.4", - "native-run": "^1.7.2", + "native-run": "^1.7.3", "open": "^8.4.0", "plist": "^3.0.5", "prompts": "^2.4.2", @@ -17525,9 +17525,9 @@ } }, "@capacitor/core": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/@capacitor/core/-/core-5.0.3.tgz", - "integrity": "sha512-VxzSoFvoiGDmvlKQMbg7ZZh+w3Z223630kZINDOsbs7UZr5gZws7+m26Y9Oc8n2miI737hE6qOf2VU4H/8Tytg==", + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/@capacitor/core/-/core-5.5.1.tgz", + "integrity": "sha512-VG6Iv8Q7ZAbvjodxpvjcSe0jfxUwZXnvjbi93ehuJ6eYP8U926qLSXyrT/DToZq+F6v/HyGyVgn3mrE/9jW2Tg==", "requires": { "tslib": "^2.1.0" }, @@ -26797,9 +26797,9 @@ } }, "native-run": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/native-run/-/native-run-1.7.2.tgz", - "integrity": "sha512-2aahC8iXIO8BcvEukVMrYwL5sXurkuIGyQgfSGBto832W6ejV+cB5Ww+2/CRxmyozhbxARJ2OMpEGPV8sTqsrQ==", + "version": "1.7.4", + "resolved": "https://registry.npmjs.org/native-run/-/native-run-1.7.4.tgz", + "integrity": "sha512-yDEwTp66vmXpqFiSQzz4sVQgyq5U58gGRovglY4GHh12ITyWa6mh6Lbpm2gViVOVD1JYFtYnwcgr7GTFBinXNA==", "dev": true, "requires": { "@ionic/utils-fs": "^3.1.6", @@ -26842,9 +26842,9 @@ } }, "tslib": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==", + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==", "dev": true } } diff --git a/package.json b/package.json index cb9500c493..f30f3646da 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "test-watch:browser-e2e": "cross-env PWTEST_WATCH=1 VITE_TARGET=browser playwright test --reporter=html", "lint": "eslint --ext .js,.vue,.ts *.config.* src tests build", "fmt": "eslint --ext .js,.vue,.ts *.config.* src tests build --fix", - "markdownlint": "markdownlint --ignore node_modules/ --ignore dist/ --ignore dist_electron/ --ignore build/vendored/voicevox_engine --ignore build/vendored/voicevox_core ./", + "markdownlint": "markdownlint --ignore node_modules/ --ignore dist/ --ignore dist_electron/ --ignore build/vendored ./", "typecheck": "tsc --noEmit", "electron:build": "cross-env VITE_TARGET=electron vite build && electron-builder --config electron-builder.config.js", "electron:build_dir": "cross-env VITE_TARGET=electron vite build && electron-builder --config electron-builder.config.js --dir", @@ -42,8 +42,8 @@ "cap:open:ios": "npx cap open ios" }, "dependencies": { - "@capacitor/android": "5.0.3", - "@capacitor/core": "5.0.3", + "@capacitor/android": "5.5.1", + "@capacitor/core": "5.5.1", "@capacitor/splash-screen": "5.0.2", "@gtm-support/vue-gtm": "1.2.3", "@quasar/extras": "1.10.10", @@ -81,7 +81,7 @@ "dmg-license": "1.0.11" }, "devDependencies": { - "@capacitor/cli": "5.0.3", + "@capacitor/cli": "5.5.1", "@openapitools/openapi-generator-cli": "2.3.3", "@playwright/test": "1.32.1", "@quasar/vite-plugin": "1.3.0", diff --git a/src/mobile/engine/query.ts b/src/mobile/engine/query.ts index 89a60b6c15..7b44c85eff 100644 --- a/src/mobile/engine/query.ts +++ b/src/mobile/engine/query.ts @@ -1,21 +1,10 @@ import { ApiProvider } from "."; import { - AccentPhrase, AccentPhraseFromJSON, AccentPhraseToJSON, AudioQueryFromJSON, } from "@/openapi"; -// TODO: https://github.com/VOICEVOX/voicevox_core/pull/486 がマージされたら消す -const accentPhrasesToJSON = (accentPhrases: AccentPhrase[]) => - accentPhrases.map((p) => { - const ret = AccentPhraseToJSON(p); - if (ret.is_interrogative == null) { - ret.is_interrogative = false; - } - return ret; - }); - const queryProvider: ApiProvider = ({ corePlugin }) => { return { async audioQueryAudioQueryPost({ text, speaker }) { @@ -45,8 +34,8 @@ const queryProvider: ApiProvider = ({ corePlugin }) => { async moraLengthMoraLengthPost({ accentPhrase: accentPhrases, speaker }) { const rawMoraLength = await corePlugin - .moraLength({ - accentPhrases: JSON.stringify(accentPhrasesToJSON(accentPhrases)), + .phonemeLength({ + accentPhrases: JSON.stringify(accentPhrases.map(AccentPhraseToJSON)), speakerId: speaker, }) .then((res) => JSON.parse(res.value)); @@ -56,7 +45,7 @@ const queryProvider: ApiProvider = ({ corePlugin }) => { async moraPitchMoraPitchPost({ accentPhrase: accentPhrases, speaker }) { const rawMoraPitch = await corePlugin .moraPitch({ - accentPhrases: JSON.stringify(accentPhrasesToJSON(accentPhrases)), + accentPhrases: JSON.stringify(accentPhrases.map(AccentPhraseToJSON)), speakerId: speaker, }) .then((res) => JSON.parse(res.value)); @@ -66,7 +55,7 @@ const queryProvider: ApiProvider = ({ corePlugin }) => { async moraDataMoraDataPost({ accentPhrase: accentPhrases, speaker }) { const rawMoraData = await corePlugin .moraData({ - accentPhrases: JSON.stringify(accentPhrasesToJSON(accentPhrases)), + accentPhrases: JSON.stringify(accentPhrases.map(AccentPhraseToJSON)), speakerId: speaker, }) .then((res) => JSON.parse(res.value)); @@ -81,7 +70,7 @@ const queryProvider: ApiProvider = ({ corePlugin }) => { const b64Audio = await corePlugin .synthesis({ audioQuery: JSON.stringify({ - accent_phrases: accentPhrasesToJSON(audioQuery.accentPhrases), + accent_phrases: audioQuery.accentPhrases.map(AccentPhraseToJSON), speed_scale: audioQuery.speedScale, pitch_scale: audioQuery.pitchScale, intonation_scale: audioQuery.intonationScale, diff --git a/src/mobile/engine/speaker.ts b/src/mobile/engine/speaker.ts index 2534daf7c5..5ac033f056 100644 --- a/src/mobile/engine/speaker.ts +++ b/src/mobile/engine/speaker.ts @@ -2,7 +2,7 @@ import { ApiProvider } from "."; import { SpeakerFromJSON, SpeakerInfo, SpeakerInfoFromJSON } from "@/openapi"; const speakerProvider: ApiProvider = ({ corePlugin }) => { - let speakerInfosMap: Record | undefined; + const speakerInfosMap: Record = {}; return { async speakersSpeakersGet() { @@ -21,12 +21,17 @@ const speakerProvider: ApiProvider = ({ corePlugin }) => { ); }, async speakerInfoSpeakerInfoGet({ speakerUuid }) { - if (!speakerInfosMap) { - speakerInfosMap = Object.fromEntries( - Object.entries( - await fetch("/speakerInfos.json").then((res) => res.json()) - ).map(([key, value]) => [key, SpeakerInfoFromJSON(value)]) - ); + if (!speakerInfosMap[speakerUuid]) { + const speakerInfo = await fetch(`/speakerInfos/${speakerUuid}.json`) + .then((res) => { + if (res.ok) { + return res.json(); + } else { + throw new Error(`SpeakerInfo not found: ${speakerUuid}`); + } + }) + .then(SpeakerInfoFromJSON); + speakerInfosMap[speakerUuid] = speakerInfo; } const speakerInfo = speakerInfosMap[speakerUuid]; if (!speakerInfo) { diff --git a/src/mobile/plugin.ts b/src/mobile/plugin.ts index d7174c08f4..103bf0fcd6 100644 --- a/src/mobile/plugin.ts +++ b/src/mobile/plugin.ts @@ -19,7 +19,7 @@ export type VoicevoxCorePlugin = { speakerId: number; }) => Promise<{ value: string }>; - moraLength: (obj: { + phonemeLength: (obj: { accentPhrases: string; speakerId: number; }) => Promise<{ value: string }>;