diff --git a/.github/scripts/node-addon/package-optional.json b/.github/scripts/node-addon/package-optional.json
index b3c71f9dad..d2db2e1920 100644
--- a/.github/scripts/node-addon/package-optional.json
+++ b/.github/scripts/node-addon/package-optional.json
@@ -1,7 +1,7 @@
{
"name": "sherpa-onnx-PLATFORM2-ARCH",
"version": "SHERPA_ONNX_VERSION",
- "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
+ "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
@@ -16,8 +16,18 @@
"transcription",
"real-time speech recognition",
"without internet connection",
+ "locally",
+ "local",
"embedded systems",
"open source",
+ "diarization",
+ "speaker diarization",
+ "speaker recognition",
+ "speaker",
+ "speaker segmentation",
+ "speaker verification",
+ "spoken language identification",
+ "sherpa",
"zipformer",
"asr",
"tts",
@@ -30,13 +40,13 @@
"offline",
"privacy",
"open source",
- "vad",
- "speaker id",
- "language id",
- "node-addon-api",
"streaming speech recognition",
"speech",
- "recognition"
+ "recognition",
+ "vad",
+ "node-addon-api",
+ "speaker id",
+ "language id"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
diff --git a/.github/scripts/node-addon/package.json b/.github/scripts/node-addon/package.json
index 0444552fc3..bc2d89e89c 100644
--- a/.github/scripts/node-addon/package.json
+++ b/.github/scripts/node-addon/package.json
@@ -1,7 +1,7 @@
{
"name": "sherpa-onnx-node",
"version": "SHERPA_ONNX_VERSION",
- "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
+ "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
"main": "sherpa-onnx.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
@@ -16,8 +16,18 @@
"transcription",
"real-time speech recognition",
"without internet connection",
+ "locally",
+ "local",
"embedded systems",
"open source",
+ "diarization",
+ "speaker diarization",
+ "speaker recognition",
+ "speaker",
+ "speaker segmentation",
+ "speaker verification",
+ "spoken language identification",
+ "sherpa",
"zipformer",
"asr",
"tts",
@@ -30,13 +40,13 @@
"offline",
"privacy",
"open source",
- "vad",
- "speaker id",
- "language id",
- "node-addon-api",
"streaming speech recognition",
"speech",
- "recognition"
+ "recognition",
+ "vad",
+ "node-addon-api",
+ "speaker id",
+ "language id"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
diff --git a/.github/scripts/test-cxx-api.sh b/.github/scripts/test-cxx-api.sh
new file mode 100755
index 0000000000..aedf161337
--- /dev/null
+++ b/.github/scripts/test-cxx-api.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+ # This function is from espnet
+ local fname=${BASH_SOURCE[1]##*/}
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE"
+echo "CXX_WHISPER_EXE is $CXX_WHISPER_EXE"
+echo "CXX_SENSE_VOICE_EXE is $CXX_SENSE_VOICE_EXE"
+echo "PATH: $PATH"
+
+log "------------------------------------------------------------"
+log "Test streaming zipformer CXX API"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+$CXX_STREAMING_ZIPFORMER_EXE
+rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+
+log "------------------------------------------------------------"
+log "Test Whisper CXX API"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+rm sherpa-onnx-whisper-tiny.en.tar.bz2
+$CXX_WHISPER_EXE
+rm -rf sherpa-onnx-whisper-tiny.en
+
+log "------------------------------------------------------------"
+log "Test SenseVoice CXX API"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+
+$CXX_SENSE_VOICE_EXE
+rm -rf sherpa-onnx-sense-voice-*
diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh
index 0aff2085e7..27199ae9f7 100755
--- a/.github/scripts/test-dart.sh
+++ b/.github/scripts/test-dart.sh
@@ -4,6 +4,38 @@ set -ex
cd dart-api-examples
+pushd tts
+
+echo '----------matcha tts----------'
+./run-kokoro-zh-en.sh
+./run-kokoro-en.sh
+./run-matcha-zh.sh
+./run-matcha-en.sh
+ls -lh *.wav
+rm -rf matcha-icefall-*
+rm *.onnx
+
+echo '----------piper tts----------'
+./run-piper.sh
+rm -rf vits-piper-*
+
+echo '----------coqui tts----------'
+./run-coqui.sh
+rm -rf vits-coqui-*
+
+echo '----------zh tts----------'
+./run-vits-zh.sh
+rm -rf sherpa-onnx-*
+
+ls -lh *.wav
+
+popd # tts
+
+pushd speaker-diarization
+echo '----------speaker diarization----------'
+./run.sh
+popd
+
pushd speaker-identification
echo '----------3d speaker----------'
./run-3d-speaker.sh
@@ -31,6 +63,10 @@ echo "----zipformer transducer----"
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*
+echo "----moonshine----"
+./run-moonshine.sh
+rm -rf sherpa-onnx-*
+
echo "----whisper----"
./run-whisper.sh
rm -rf sherpa-onnx-*
@@ -72,6 +108,10 @@ echo '----------TeleSpeech CTC----------'
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*
+echo '----------moonshine----------'
+./run-moonshine.sh
+rm -rf sherpa-onnx-*
+
echo '----------whisper----------'
./run-whisper.sh
rm -rf sherpa-onnx-*
@@ -93,22 +133,6 @@ rm -rf sherpa-onnx-*
popd # non-streaming-asr
-pushd tts
-
-echo '----------piper tts----------'
-./run-piper.sh
-rm -rf vits-piper-*
-
-echo '----------coqui tts----------'
-./run-coqui.sh
-rm -rf vits-coqui-*
-
-echo '----------zh tts----------'
-./run-zh.sh
-rm -rf sherpa-onnx-*
-
-popd # tts
-
pushd streaming-asr
echo '----------streaming zipformer ctc HLG----------'
diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh
index c397fc0cdf..aa41ad985d 100755
--- a/.github/scripts/test-dot-net.sh
+++ b/.github/scripts/test-dot-net.sh
@@ -2,7 +2,41 @@
cd dotnet-examples/
-cd ./offline-decode-files
+cd ./kokoro-tts
+./run-kokoro.sh
+ls -lh
+
+cd ../offline-tts
+./run-matcha-zh.sh
+ls -lh *.wav
+./run-matcha-en.sh
+ls -lh *.wav
+./run-aishell3.sh
+ls -lh *.wav
+./run-piper.sh
+ls -lh *.wav
+./run-hf-fanchen.sh
+ls -lh *.wav
+ls -lh
+
+pushd ../..
+
+mkdir tts
+
+cp -v dotnet-examples/kokoro-tts/*.wav ./tts
+cp -v dotnet-examples/offline-tts/*.wav ./tts
+popd
+
+cd ../offline-speaker-diarization
+./run.sh
+rm -rfv *.onnx
+rm -fv *.wav
+rm -rfv sherpa-onnx-pyannote-*
+
+cd ../offline-decode-files
+./run-moonshine.sh
+rm -rf sherpa-onnx-*
+
./run-sense-voice-ctc.sh
rm -rf sherpa-onnx-*
@@ -67,14 +101,4 @@ cd ../spoken-language-identification
./run.sh
rm -rf sherpa-onnx-*
-cd ../offline-tts
-./run-aishell3.sh
-./run-piper.sh
-./run-hf-fanchen.sh
-ls -lh
-
-cd ../..
-
-mkdir tts
-cp dotnet-examples/offline-tts/*.wav ./tts
diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh
index a46e2de8ed..53db04d739 100755
--- a/.github/scripts/test-nodejs-addon-npm.sh
+++ b/.github/scripts/test-nodejs-addon-npm.sh
@@ -10,7 +10,34 @@ arch=$(node -p "require('os').arch()")
platform=$(node -p "require('os').platform()")
node_version=$(node -p "process.versions.node.split('.')[0]")
-echo "----------non-streaming asr + vad----------"
+echo "----------non-streaming asr moonshine + vad----------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+
+node ./test_vad_with_non_streaming_asr_moonshine.js
+rm -rf sherpa-onnx-*
+rm *.wav
+rm *.onnx
+
+echo "----------non-streaming speaker diarization----------"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test_offline_speaker_diarization.js
+
+rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*
+
+echo "----------non-streaming asr whisper + vad----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
@@ -58,6 +85,41 @@ fi
echo "----------tts----------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+node ./test_tts_non_streaming_kokoro_zh_en.js
+ls -lh *.wav
+rm -rf kokoro-multi-lang-v1_0
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+node ./test_tts_non_streaming_kokoro_en.js
+ls -lh *.wav
+rm -rf kokoro-en-v0_19
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test_tts_non_streaming_matcha_icefall_en.js
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-en_US-ljspeech
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test_tts_non_streaming_matcha_icefall_zh.js
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-zh-baker
+ls -lh *.wav
+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2
@@ -204,6 +266,11 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2
node ./test_asr_non_streaming_whisper.js
rm -rf sherpa-onnx-whisper-tiny.en
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+node ./test_asr_non_streaming_moonshine.js
+rm -rf sherpa-onnx-*
ls -lh
diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh
index c41a0de658..536310af7b 100755
--- a/.github/scripts/test-nodejs-npm.sh
+++ b/.github/scripts/test-nodejs-npm.sh
@@ -9,6 +9,94 @@ git status
ls -lh
ls -lh node_modules
+# offline tts
+#
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+node ./test-offline-tts-kokoro-zh-en.js
+ls -lh *.wav
+rm -rf kokoro-multi-lang-v1_0
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+node ./test-offline-tts-kokoro-en.js
+rm -rf kokoro-en-v0_19
+
+ls -lh
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test-offline-tts-matcha-zh.js
+
+rm -rf matcha-icefall-zh-baker
+rm hifigan_v2.onnx
+
+echo "---"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test-offline-tts-matcha-en.js
+
+rm -rf matcha-icefall-en_US-ljspeech
+rm hifigan_v2.onnx
+
+echo "---"
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
+tar xf vits-piper-en_US-amy-low.tar.bz2
+node ./test-offline-tts-vits-en.js
+rm -rf vits-piper-en_US-amy-low*
+
+echo "---"
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
+tar xvf vits-icefall-zh-aishell3.tar.bz2
+node ./test-offline-tts-vits-zh.js
+rm -rf vits-icefall-zh-aishell3*
+
+ls -lh *.wav
+
+echo '-----speaker diarization----------'
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test-offline-speaker-diarization.js
+rm -rfv *.wav *.onnx sherpa-onnx-pyannote-*
+
+echo '-----vad+moonshine----------'
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+node ./test-vad-with-non-streaming-asr-whisper.js
+rm Obama.wav
+rm silero_vad.onnx
+rm -rf sherpa-onnx-moonshine-*
+
echo '-----vad+whisper----------'
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
@@ -78,6 +166,13 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2
node ./test-offline-whisper.js
rm -rf sherpa-onnx-whisper-tiny.en
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+node ./test-offline-moonshine.js
+rm -rf sherpa-onnx-moonshine-*
+
# online asr
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
@@ -111,15 +206,3 @@ tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
node ./test-online-zipformer2-ctc-hlg.js
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
-
-# offline tts
-
-curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
-tar xf vits-piper-en_US-amy-low.tar.bz2
-node ./test-offline-tts-en.js
-rm -rf vits-piper-en_US-amy-low*
-
-curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
-tar xvf vits-icefall-zh-aishell3.tar.bz2
-node ./test-offline-tts-zh.js
-rm -rf vits-icefall-zh-aishell3*
diff --git a/.github/scripts/test-offline-ctc.sh b/.github/scripts/test-offline-ctc.sh
index 57208e9da2..f85b585398 100755
--- a/.github/scripts/test-offline-ctc.sh
+++ b/.github/scripts/test-offline-ctc.sh
@@ -15,6 +15,21 @@ echo "PATH: $PATH"
which $EXE
+log "------------------------------------------------------------"
+log "Run NeMo GigaAM Russian models"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+tar xvf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+rm sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+
+$EXE \
+ --nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx \
+ --tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt \
+ --debug=1 \
+ ./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/test_wavs/example.wav
+
+rm -rf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24
+
log "------------------------------------------------------------"
log "Run SenseVoice models"
log "------------------------------------------------------------"
diff --git a/.github/scripts/test-offline-moonshine.sh b/.github/scripts/test-offline-moonshine.sh
new file mode 100755
index 0000000000..1768e82ecd
--- /dev/null
+++ b/.github/scripts/test-offline-moonshine.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+ # This function is from espnet
+ local fname=${BASH_SOURCE[1]##*/}
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+export GIT_CLONE_PROTECTION_ACTIVE=false
+
+echo "EXE is $EXE"
+echo "PATH: $PATH"
+
+which $EXE
+
+names=(
+tiny
+base
+)
+
+for name in ${names[@]}; do
+ log "------------------------------------------------------------"
+ log "Run $name"
+ log "------------------------------------------------------------"
+
+ repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-$name.tar.bz2
+ repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-$name-en-int8.tar.bz2
+ curl -SL -O $repo_url
+ tar xvf sherpa-onnx-moonshine-$name-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-$name-en-int8.tar.bz2
+ repo=sherpa-onnx-moonshine-$name-en-int8
+ log "Start testing ${repo_url}"
+
+ log "test int8 onnx"
+
+ time $EXE \
+ --moonshine-preprocessor=$repo/preprocess.onnx \
+ --moonshine-encoder=$repo/encode.int8.onnx \
+ --moonshine-uncached-decoder=$repo/uncached_decode.int8.onnx \
+ --moonshine-cached-decoder=$repo/cached_decode.int8.onnx \
+ --tokens=$repo/tokens.txt \
+ --num-threads=2 \
+ $repo/test_wavs/0.wav \
+ $repo/test_wavs/1.wav \
+ $repo/test_wavs/8k.wav
+
+ rm -rf $repo
+done
diff --git a/.github/scripts/test-offline-tts.sh b/.github/scripts/test-offline-tts.sh
index d3d35df2cb..baa2b37bb9 100755
--- a/.github/scripts/test-offline-tts.sh
+++ b/.github/scripts/test-offline-tts.sh
@@ -18,6 +18,87 @@ which $EXE
# test waves are saved in ./tts
mkdir ./tts
+log "------------------------------------------------------------"
+log "kokoro-en-v0_19"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+# mapping of sid to voice name
+# 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+# 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+
+for sid in $(seq 0 10); do
+ $EXE \
+ --debug=1 \
+ --kokoro-model=./kokoro-en-v0_19/model.onnx \
+ --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+ --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+ --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+ --num-threads=2 \
+ --sid=$sid \
+ --output-filename="./tts/kokoro-$sid.wav" \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be a statesman, a businessman, an official, or a scholar."
+done
+rm -rf kokoro-en-v0_19
+
+log "------------------------------------------------------------"
+log "matcha-icefall-en_US-ljspeech"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+$EXE \
+ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --num-threads=2 \
+ --output-filename=./tts/matcha-ljspeech-1.wav \
+ --debug=1 \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
+
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-en_US-ljspeech
+
+log "------------------------------------------------------------"
+log "matcha-icefall-zh-baker"
+log "------------------------------------------------------------"
+curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+$EXE \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+ --num-threads=2 \
+ --debug=1 \
+ --output-filename=./tts/matcha-baker-zh-1.wav \
+ '小米的使命是,始终坚持做"感动人心、价格厚道"的好产品,让全球每个人都能享受科技带来的美好生活'
+
+$EXE \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+ --num-threads=2 \
+ --debug=1 \
+ --output-filename=./tts/matcha-baker-zh-2.wav \
+ "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。"
+
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-zh-baker
+
log "------------------------------------------------------------"
log "vits-piper-en_US-amy-low"
log "------------------------------------------------------------"
diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh
index de7297f2c3..dd4da51207 100755
--- a/.github/scripts/test-python.sh
+++ b/.github/scripts/test-python.sh
@@ -8,6 +8,52 @@ log() {
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
+log "test offline zipformer (byte-level bpe, Chinese+English)"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
+tar xvf sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
+rm sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
+
+repo=sherpa-onnx-zipformer-zh-en-2023-11-22
+
+./python-api-examples/offline-decode-files.py \
+ --tokens=$repo/tokens.txt \
+ --encoder=$repo/encoder-epoch-34-avg-19.int8.onnx \
+ --decoder=$repo/decoder-epoch-34-avg-19.onnx \
+ --joiner=$repo/joiner-epoch-34-avg-19.int8.onnx \
+ --num-threads=2 \
+ --decoding-method=greedy_search \
+ --debug=true \
+ $repo/test_wavs/0.wav \
+ $repo/test_wavs/1.wav \
+ $repo/test_wavs/2.wav
+
+rm -rf sherpa-onnx-zipformer-zh-en-2023-11-22
+
+log "test offline Moonshine"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+python3 ./python-api-examples/offline-moonshine-decode-files.py
+
+rm -rf sherpa-onnx-moonshine-tiny-en-int8
+
+log "test offline speaker diarization"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+python3 ./python-api-examples/offline-speaker-diarization.py
+
+rm -rf *.wav *.onnx ./sherpa-onnx-pyannote-segmentation-3-0
+
+
log "test_clustering"
pushd /tmp/
mkdir test-cluster
@@ -221,6 +267,87 @@ log "Offline TTS test"
# test waves are saved in ./tts
mkdir ./tts
+log "kokoro-multi-lang-v1_0 test"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+python3 ./python-api-examples/offline-tts.py \
+ --debug=1 \
+ --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
+ --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
+ --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
+ --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
+ --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \
+ --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+ --num-threads=2 \
+ --sid=18 \
+ --output-filename="./tts/kokoro-18-zh-en.wav" \
+ "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?"
+
+rm -rf kokoro-multi-lang-v1_0
+
+log "kokoro-en-v0_19 test"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+python3 ./python-api-examples/offline-tts.py \
+ --debug=1 \
+ --kokoro-model=./kokoro-en-v0_19/model.onnx \
+ --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+ --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+ --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+ --num-threads=2 \
+ --sid=10 \
+ --output-filename="./tts/kokoro-10.wav" \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be a statesman, a businessman, an official, or a scholar."
+
+rm -rf kokoro-en-v0_19
+
+log "matcha-ljspeech-en test"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts.py \
+ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --output-filename=./tts/test-matcha-ljspeech-en.wav \
+ --num-threads=2 \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
+
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-en_US-ljspeech
+
+log "matcha-baker-zh test"
+
+curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts.py \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+ --output-filename=./tts/test-matcha-baker-zh.wav \
+ "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
+
+rm -rf matcha-icefall-zh-baker
+rm hifigan_v2.onnx
+
log "vits-ljs test"
curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx
@@ -468,53 +595,19 @@ echo "sherpa_onnx version: $sherpa_onnx_version"
pwd
ls -lh
-repo=sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01
-log "Start testing ${repo}"
-
-pushd $dir
-curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
-tar xf sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
-rm sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
-popd
-
-repo=$dir/$repo
-ls -lh $repo
-
-python3 ./python-api-examples/keyword-spotter.py \
- --tokens=$repo/tokens.txt \
- --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
- --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
- --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
- --keywords-file=$repo/test_wavs/test_keywords.txt \
- $repo/test_wavs/0.wav \
- $repo/test_wavs/1.wav
-
-rm -rf $repo
-
if [[ x$OS != x'windows-latest' ]]; then
echo "OS: $OS"
repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
log "Start testing ${repo}"
- pushd $dir
curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
- popd
- repo=$dir/$repo
ls -lh $repo
- python3 ./python-api-examples/keyword-spotter.py \
- --tokens=$repo/tokens.txt \
- --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
- --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
- --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
- --keywords-file=$repo/test_wavs/test_keywords.txt \
- $repo/test_wavs/3.wav \
- $repo/test_wavs/4.wav \
- $repo/test_wavs/5.wav
+ python3 ./python-api-examples/keyword-spotter.py
python3 sherpa-onnx/python/tests/test_keyword_spotter.py --verbose
diff --git a/.github/scripts/test-speaker-diarization.sh b/.github/scripts/test-speaker-diarization.sh
new file mode 100755
index 0000000000..6d7b2effd0
--- /dev/null
+++ b/.github/scripts/test-speaker-diarization.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+ # This function is from espnet
+ local fname=${BASH_SOURCE[1]##*/}
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+echo "EXE is $EXE"
+echo "PATH: $PATH"
+
+which $EXE
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+log "specify number of clusters"
+$EXE \
+ --clustering.num-clusters=4 \
+ --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+ --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+ ./0-four-speakers-zh.wav
+
+log "specify threshold for clustering"
+
+$EXE \
+ --clustering.cluster-threshold=0.90 \
+ --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+ --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+ ./0-four-speakers-zh.wav
+
+rm -rf sherpa-onnx-pyannote-*
+rm -fv *.onnx
+rm -fv *.wav
diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh
index 18c9bed418..65fe4588aa 100755
--- a/.github/scripts/test-swift.sh
+++ b/.github/scripts/test-swift.sh
@@ -7,6 +7,31 @@ echo "pwd: $PWD"
cd swift-api-examples
ls -lh
+./run-tts-vits.sh
+ls -lh
+rm -rf vits-piper-*
+
+./run-tts-kokoro-zh-en.sh
+ls -lh
+rm -rf kokoro-multi-*
+
+./run-tts-kokoro-en.sh
+ls -lh
+rm -rf kokoro-en-*
+
+./run-tts-matcha-zh.sh
+ls -lh
+rm -rf matcha-icefall-*
+
+./run-tts-matcha-en.sh
+ls -lh
+rm -rf matcha-icefall-*
+
+./run-speaker-diarization.sh
+rm -rf *.onnx
+rm -rf sherpa-onnx-pyannote-segmentation-3-0
+rm -fv *.wav
+
./run-add-punctuations.sh
rm ./add-punctuations
rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
@@ -33,8 +58,9 @@ popd
ls -lh /Users/fangjun/Desktop
cat /Users/fangjun/Desktop/Obama.srt
-./run-tts.sh
-ls -lh
+rm -rf sherpa-onnx-whisper*
+rm -f *.onnx
+rm /Users/fangjun/Desktop/Obama.wav
./run-decode-file.sh
rm decode-file
@@ -43,5 +69,4 @@ sed -i.bak '20d' ./decode-file.swift
./run-decode-file-non-streaming.sh
-
ls -lh
diff --git a/.github/workflows/aarch64-linux-gnu-shared.yaml b/.github/workflows/aarch64-linux-gnu-shared.yaml
index 5e82d9b3ad..1851645251 100644
--- a/.github/workflows/aarch64-linux-gnu-shared.yaml
+++ b/.github/workflows/aarch64-linux-gnu-shared.yaml
@@ -9,7 +9,6 @@ on:
- 'v[0-9]+.[0-9]+.[0-9]+*'
paths:
- '.github/workflows/aarch64-linux-gnu-shared.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/aarch64-linux-gnu-shared.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -34,11 +32,20 @@ concurrency:
jobs:
aarch64_linux_gnu_shared:
runs-on: ${{ matrix.os }}
- name: aarch64 shared lib test
+ name: aarch64 shared GPU ${{ matrix.gpu }} ${{ matrix.onnxruntime_version }}
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest]
+ include:
+ - os: ubuntu-latest
+ gpu: ON
+ onnxruntime_version: "1.11.0"
+ - os: ubuntu-latest
+ gpu: ON
+ onnxruntime_version: "1.16.0"
+ - os: ubuntu-latest
+ gpu: OFF
+ onnxruntime_version: ""
steps:
- uses: actions/checkout@v4
@@ -61,7 +68,7 @@ jobs:
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
- sudo apt-get install autoconf automake autotools-dev ninja-build
+ sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev.
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -79,15 +86,24 @@ jobs:
make -j2
make install
- - name: cache-toolchain
- id: cache-toolchain
+ - name: cache-toolchain (CPU)
+ if: matrix.gpu == 'OFF'
+ id: cache-toolchain-cpu
uses: actions/cache@v4
with:
path: toolchain
key: gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz
- - name: Download toolchain
- if: steps.cache-toolchain.outputs.cache-hit != 'true'
+ - name: cache-toolchain (GPU)
+ if: matrix.gpu == 'ON'
+ id: cache-toolchain-gpu
+ uses: actions/cache@v4
+ with:
+ path: toolchain
+ key: gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz
+
+ - name: Download toolchain (CPU, gcc 7.5)
+ if: steps.cache-toolchain-cpu.outputs.cache-hit != 'true' && matrix.gpu == 'OFF'
shell: bash
run: |
wget -qq https://huggingface.co/csukuangfj/sherpa-ncnn-toolchains/resolve/main/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz
@@ -95,6 +111,15 @@ jobs:
mkdir $GITHUB_WORKSPACE/toolchain
tar xf ./gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain
+ - name: Download toolchain (GPU, gcc 10.3)
+ if: steps.cache-toolchain-gpu.outputs.cache-hit != 'true' && matrix.gpu == 'ON'
+ shell: bash
+ run: |
+ wget -qq https://huggingface.co/csukuangfj/sherpa-ncnn-toolchains/resolve/main/gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz
+
+ mkdir $GITHUB_WORKSPACE/toolchain
+ tar xf ./gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain
+
- name: Set environment variable
if: steps.cache-build-result.outputs.cache-hit != 'true'
shell: bash
@@ -103,19 +128,31 @@ jobs:
echo "$GITHUB_WORKSPACE/bin" >> "$GITHUB_PATH"
ls -lh "$GITHUB_WORKSPACE/toolchain/bin"
- echo "CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
- echo "CXX=aarch64-linux-gnu-g++" >> "$GITHUB_ENV"
+ if [[ ${{ matrix.gpu }} == OFF ]]; then
+ echo "CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
+ echo "CXX=aarch64-linux-gnu-g++" >> "$GITHUB_ENV"
+ else
+ echo "CC=aarch64-none-linux-gnu-gcc" >> "$GITHUB_ENV"
+ echo "CXX=aarch64-none-linux-gnu-g++" >> "$GITHUB_ENV"
+ fi
- name: Display toolchain info
shell: bash
run: |
- aarch64-linux-gnu-gcc --version
+ if [[ ${{ matrix.gpu }} == OFF ]]; then
+ which aarch64-linux-gnu-gcc
+ aarch64-linux-gnu-gcc --version
+ else
+ which aarch64-none-linux-gnu-gcc
+ aarch64-none-linux-gnu-gcc --version
+ fi
- name: Display qemu-aarch64 -h
shell: bash
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc
+ export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-none-linux-gnu/libc
qemu-aarch64 -h
- name: build aarch64-linux-gnu
@@ -127,6 +164,8 @@ jobs:
cmake --version
export BUILD_SHARED_LIBS=ON
+ export SHERPA_ONNX_ENABLE_GPU=${{ matrix.gpu }}
+ export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=${{ matrix.onnxruntime_version }}
./build-aarch64-linux-gnu.sh
@@ -140,7 +179,11 @@ jobs:
run: |
export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
- export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc
+ if [[ ${{ matrix.gpu }} == OFF ]]; then
+ export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc
+ else
+ export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-none-linux-gnu/libc
+ fi
ls -lh ./build-aarch64-linux-gnu/bin
@@ -151,11 +194,20 @@ jobs:
- name: Copy files
shell: bash
run: |
- aarch64-linux-gnu-strip --version
+ if [[ ${{ matrix.gpu }} == OFF ]]; then
+ aarch64-linux-gnu-strip --version
+ else
+ aarch64-none-linux-gnu-strip --version
+ fi
SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-shared
+ if [[ ${{ matrix.gpu }} == OFF ]]; then
+ dst=${dst}-cpu
+ else
+ dst=${dst}-gpu-onnxruntime-${{ matrix.onnxruntime_version }}
+ fi
mkdir $dst
cp -a build-aarch64-linux-gnu/install/bin $dst/
@@ -166,7 +218,11 @@ jobs:
ls -lh $dst/bin/
echo "strip"
- aarch64-linux-gnu-strip $dst/bin/*
+ if [[ ${{ matrix.gpu }} == OFF ]]; then
+ aarch64-linux-gnu-strip $dst/bin/*
+ else
+ aarch64-none-linux-gnu-strip $dst/bin/*
+ fi
tree $dst
@@ -174,8 +230,8 @@ jobs:
- uses: actions/upload-artifact@v4
with:
- name: sherpa-onnx-linux-aarch64-shared
- path: sherpa-onnx-*linux-aarch64-shared.tar.bz2
+ name: sherpa-onnx-linux-aarch64-shared-gpu-${{ matrix.gpu }}-onnxruntime-${{ matrix.onnxruntime_version }}
+ path: sherpa-onnx-*linux-aarch64-shared*.tar.bz2
# https://huggingface.co/docs/hub/spaces-github-actions
- name: Publish to huggingface
@@ -193,12 +249,12 @@ jobs:
rm -rf huggingface
export GIT_CLONE_PROTECTION_ACTIVE=false
- GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
mkdir -p aarch64
- cp -v ../sherpa-onnx-*-shared.tar.bz2 ./aarch64
+ cp -v ../sherpa-onnx-*-shared*.tar.bz2 ./aarch64
git status
git lfs track "*.bz2"
diff --git a/.github/workflows/aarch64-linux-gnu-static.yaml b/.github/workflows/aarch64-linux-gnu-static.yaml
index 765e2422f3..66ce6ec244 100644
--- a/.github/workflows/aarch64-linux-gnu-static.yaml
+++ b/.github/workflows/aarch64-linux-gnu-static.yaml
@@ -9,7 +9,6 @@ on:
- 'v[0-9]+.[0-9]+.[0-9]+*'
paths:
- '.github/workflows/aarch64-linux-gnu-static.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/aarch64-linux-gnu-static.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -61,7 +59,7 @@ jobs:
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
- sudo apt-get install autoconf automake autotools-dev ninja-build
+ sudo apt-get install build-essential zlib1g-dev pkg-config libglib2.0-dev binutils-dev libboost-all-dev autoconf libtool libssl-dev libpixman-1-dev ninja-build
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -184,7 +182,7 @@ jobs:
rm -rf huggingface
export GIT_CLONE_PROTECTION_ACTIVE=false
- GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
mkdir -p aarch64
diff --git a/.github/workflows/add-new-asr-models.yaml b/.github/workflows/add-new-asr-models.yaml
new file mode 100644
index 0000000000..6bd2230f15
--- /dev/null
+++ b/.github/workflows/add-new-asr-models.yaml
@@ -0,0 +1,61 @@
+name: add-new-asr-models
+
+on:
+ # push:
+ # branches:
+ # - new-asr-models
+ workflow_dispatch:
+
+concurrency:
+ group: add-new-asr-models-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ add-new-asr-models:
+ runs-on: ${{ matrix.os }}
+ name: New asr models
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Download icefall-asr-zipformer-multi-zh-en-2023-11-22
+ shell: bash
+ run: |
+ d=sherpa-onnx-zipformer-zh-en-2023-11-22
+ mkdir $d
+ pushd $d
+
+ wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/tokens.txt
+ wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/bbpe.model
+ wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/decoder-epoch-34-avg-19.onnx
+ wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.int8.onnx
+ wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.onnx
+ wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.int8.onnx
+ wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.onnx
+
+ mkdir test_wavs
+ cd test_wavs
+ wget -O 0.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav
+ wget -O 1.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav
+
+ wget -O 2.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
+ popd
+ tar cvjf $d.tar.bz2 $d
+ ls -lh $d
+ rm -rf $d
+
+ - name: Release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ file: ./*.tar.bz2
+ overwrite: true
+ repo_name: k2-fsa/sherpa-onnx
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ tag: asr-models
diff --git a/.github/workflows/android-static.yaml b/.github/workflows/android-static.yaml
new file mode 100644
index 0000000000..7dad8128be
--- /dev/null
+++ b/.github/workflows/android-static.yaml
@@ -0,0 +1,296 @@
+# static means we link onnxruntime statically
+# but we still have libsherpa-onnx-jni.so
+name: android-static
+
+on:
+ push:
+ branches:
+ - master
+ - android-link-onnxruntime-statically
+ paths:
+ - '.github/workflows/android-static.yaml'
+ - 'cmake/**'
+ - 'sherpa-onnx/csrc/*'
+ - 'sherpa-onnx/jni/*'
+ - 'build-android*.sh'
+ tags:
+ - 'v[0-9]+.[0-9]+.[0-9]+*'
+ pull_request:
+ branches:
+ - master
+ paths:
+ - '.github/workflows/android-static.yaml'
+ - 'cmake/**'
+ - 'sherpa-onnx/csrc/*'
+ - 'sherpa-onnx/jni/*'
+ - 'build-android*.sh'
+
+ workflow_dispatch:
+
+concurrency:
+ group: android-static-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build-android-static-libs:
+ name: Android static libs
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ key: ${{ matrix.os }}-android-jni-static
+
+ - name: Display NDK HOME
+ shell: bash
+ run: |
+ echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+ ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+ - name: build android arm64-v8a
+ shell: bash
+ run: |
+ export BUILD_SHARED_LIBS=OFF
+
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+ export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ ./build-android-arm64-v8a.sh
+ mkdir -p jniLibs/arm64-v8a/
+ cp -v ./build-android-arm64-v8a-static/install/lib/*.so ./jniLibs/arm64-v8a/
+ rm -rf ./build-android-arm64-v8a-static/
+
+ - name: build android armv7-eabi
+ shell: bash
+ run: |
+ export BUILD_SHARED_LIBS=OFF
+
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+ export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ ./build-android-armv7-eabi.sh
+ mkdir -p ./jniLibs/armeabi-v7a/
+ cp -v ./build-android-armv7-eabi-static/install/lib/*.so ./jniLibs/armeabi-v7a/
+ rm -rf ./build-android-armv7-eabi-static
+
+ - name: build android x86_64
+ shell: bash
+ run: |
+ export BUILD_SHARED_LIBS=OFF
+
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+ export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ ./build-android-x86-64.sh
+ mkdir -p ./jniLibs/x86_64
+ cp -v ./build-android-x86-64-static/install/lib/*.so ./jniLibs/x86_64
+ rm -rf ./build-android-x86-64-static
+
+ - name: build android x86
+ shell: bash
+ run: |
+ export BUILD_SHARED_LIBS=OFF
+
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+ export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ ./build-android-x86.sh
+ mkdir -p ./jniLibs/x86
+ cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86
+ rm -rf ./build-android-x86
+
+ - name: Copy files
+ shell: bash
+ run: |
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+ filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-android-static-link-onnxruntime.tar.bz2
+
+ tar cjvf $filename ./jniLibs
+
+ ls -lh
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: sherpa-onnx-android-libs-static
+ path: ./jniLibs
+
+ # https://huggingface.co/docs/hub/spaces-github-actions
+ - name: Publish to huggingface
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+ du -h -d1 .
+ ls -lh
+
+ rm -rf huggingface
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+ cd huggingface
+
+ cp -v ../sherpa-onnx-*-android*.tar.bz2 ./
+
+ git status
+ git lfs track "*.bz2"
+
+ git add .
+
+ git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-android.tar.bz2"
+
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+ - name: Release android libs
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: sherpa-onnx-*-android*.tar.bz2
+
+ build-android-aar-static:
+ needs: [build-android-static-libs]
+ name: Android AAR
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ # https://github.com/actions/setup-java
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin' # See 'Supported distributions' for available options
+ java-version: '21'
+
+ - name: Display NDK HOME
+ shell: bash
+ run: |
+ echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+ ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+ - name: Retrieve artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: sherpa-onnx-android-libs-static
+ path: /tmp/jniLibs
+
+ - name: Show jni libs
+ shell: bash
+ run: |
+ ls -lh /tmp/jniLibs
+
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64
+ #
+ - name: Copy libs
+ shell: bash
+ run: |
+ for arch in arm64-v8a armeabi-v7a x86 x86_64; do
+ cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/
+ done
+
+ - name: Check libs
+ shell: bash
+ run: |
+ ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/*
+
+ - name: Build aar
+ shell: bash
+ run: |
+ cd android/SherpaOnnxAar
+
+ ./gradlew :sherpa_onnx:assembleRelease
+
+ - name: Display aar
+ shell: bash
+ run: |
+ cd android/SherpaOnnxAar
+
+ ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
+ cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../
+
+ - name: Rename aar
+ shell: bash
+ run: |
+ SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+ mv sherpa_onnx-release.aar sherpa-onnx-static-link-onnxruntime-${SHERPA_ONNX_VERSION}.aar
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: sherpa-onnx-android-aar-static
+ path: ./*.aar
+
+ # https://huggingface.co/docs/hub/spaces-github-actions
+ - name: Publish to huggingface
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+ du -h -d1 .
+ ls -lh
+
+ rm -rf huggingface
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+ cd huggingface
+ dst=android/aar
+ mkdir -p $dst
+
+ cp -v ../*.aar $dst
+
+ git status
+ git lfs track "*.aar"
+
+ git add .
+
+ git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar"
+
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+ - name: Release android aar
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: ./*.aar
diff --git a/.github/workflows/android.yaml b/.github/workflows/android.yaml
index 35dfd6b26f..b7da9b8a60 100644
--- a/.github/workflows/android.yaml
+++ b/.github/workflows/android.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/android.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/jni/*'
@@ -18,7 +17,6 @@ on:
- master
paths:
- '.github/workflows/android.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/jni/*'
@@ -32,7 +30,7 @@ concurrency:
jobs:
build-android-libs:
- name: Android for ${{ matrix.os }}
+ name: Android libs
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
@@ -44,6 +42,11 @@ jobs:
with:
fetch-depth: 0
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ key: ${{ matrix.os }}-android-jni
+
- name: Display NDK HOME
shell: bash
run: |
@@ -53,37 +56,57 @@ jobs:
- name: build android arm64-v8a
shell: bash
run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ export SHERPA_ONNX_ENABLE_C_API=ON
./build-android-arm64-v8a.sh
mkdir -p jniLibs/arm64-v8a/
cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/
+ cp -v ./build-android-arm64-v8a/install/lib/README.md ./jniLibs/arm64-v8a/
rm -rf ./build-android-arm64-v8a/
- name: build android armv7-eabi
shell: bash
run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ export SHERPA_ONNX_ENABLE_C_API=ON
./build-android-armv7-eabi.sh
mkdir -p ./jniLibs/armeabi-v7a/
cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/
+ cp -v ./build-android-armv7-eabi/install/lib/README.md ./jniLibs/armeabi-v7a/
rm -rf ./build-android-armv7-eabi
- name: build android x86_64
shell: bash
run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ export SHERPA_ONNX_ENABLE_C_API=ON
./build-android-x86-64.sh
mkdir -p ./jniLibs/x86_64
cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64
+ cp -v ./build-android-x86-64/install/lib/README.md ./jniLibs/x86_64
rm -rf ./build-android-x86-64
- name: build android x86
shell: bash
run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ export SHERPA_ONNX_ENABLE_C_API=ON
./build-android-x86.sh
mkdir -p ./jniLibs/x86
cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86
+ cp -v ./build-android-x86/install/lib/README.md ./jniLibs/x86
rm -rf ./build-android-x86
- name: Copy files
@@ -121,7 +144,7 @@ jobs:
rm -rf huggingface
export GIT_CLONE_PROTECTION_ACTIVE=false
- GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
@@ -143,3 +166,129 @@ jobs:
file_glob: true
overwrite: true
file: sherpa-onnx-*-android.tar.bz2
+
+ build-android-aar:
+ needs: [build-android-libs]
+ name: Android AAR
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ # https://github.com/actions/setup-java
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin' # See 'Supported distributions' for available options
+ java-version: '21'
+
+ - name: Display NDK HOME
+ shell: bash
+ run: |
+ echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+ ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+ - name: Retrieve artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: sherpa-onnx-android-libs
+ path: /tmp/jniLibs
+
+ - name: Show jni libs
+ shell: bash
+ run: |
+ ls -lh /tmp/jniLibs
+
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86
+ # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64
+ #
+ - name: Copy libs
+ shell: bash
+ run: |
+ for arch in arm64-v8a armeabi-v7a x86 x86_64; do
+ cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/
+ done
+
+ - name: Check libs
+ shell: bash
+ run: |
+ ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/*
+
+ - name: Build aar
+ shell: bash
+ run: |
+ cd android/SherpaOnnxAar
+
+ ./gradlew :sherpa_onnx:assembleRelease
+
+ - name: Display aar
+ shell: bash
+ run: |
+ cd android/SherpaOnnxAar
+
+ ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
+ cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../
+
+
+ - name: Rename aar
+ shell: bash
+ run: |
+ SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+ mv sherpa_onnx-release.aar sherpa-onnx-${SHERPA_ONNX_VERSION}.aar
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: sherpa-onnx-android-aar
+ path: ./*.aar
+
+ # https://huggingface.co/docs/hub/spaces-github-actions
+ - name: Publish to huggingface
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+ du -h -d1 .
+ ls -lh
+
+ rm -rf huggingface
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+ cd huggingface
+ dst=android/aar
+ mkdir -p $dst
+
+ cp -v ../*.aar $dst
+
+ git status
+ git lfs track "*.aar"
+
+ git add .
+
+ git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar"
+
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+ - name: Release android aar
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: ./*.aar
diff --git a/.github/workflows/apk-asr-2pass.yaml b/.github/workflows/apk-asr-2pass.yaml
index bbe61060a8..72885db45e 100644
--- a/.github/workflows/apk-asr-2pass.yaml
+++ b/.github/workflows/apk-asr-2pass.yaml
@@ -23,8 +23,8 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
- total: ["2"]
- index: ["0", "1"]
+ total: ["4"]
+ index: ["0", "1", "2", "3"]
steps:
- uses: actions/checkout@v4
@@ -163,7 +163,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-asr.yaml b/.github/workflows/apk-asr.yaml
index fc1cd1f5d8..e49b179c8b 100644
--- a/.github/workflows/apk-asr.yaml
+++ b/.github/workflows/apk-asr.yaml
@@ -23,8 +23,8 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
- total: ["3"]
- index: ["0", "1", "2"]
+ total: ["6"]
+ index: ["0", "1", "2", "3", "4", "5"]
steps:
- uses: actions/checkout@v4
@@ -163,7 +163,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-audio-tagging-wearos.yaml b/.github/workflows/apk-audio-tagging-wearos.yaml
index 0ed8230769..bfe9f9ac7c 100644
--- a/.github/workflows/apk-audio-tagging-wearos.yaml
+++ b/.github/workflows/apk-audio-tagging-wearos.yaml
@@ -163,7 +163,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-audio-tagging.yaml b/.github/workflows/apk-audio-tagging.yaml
index f6b85c3b2f..c11180c4ae 100644
--- a/.github/workflows/apk-audio-tagging.yaml
+++ b/.github/workflows/apk-audio-tagging.yaml
@@ -160,7 +160,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-kws.yaml b/.github/workflows/apk-kws.yaml
index 524622de85..43cdef49e0 100644
--- a/.github/workflows/apk-kws.yaml
+++ b/.github/workflows/apk-kws.yaml
@@ -160,7 +160,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-speaker-diarization.yaml b/.github/workflows/apk-speaker-diarization.yaml
new file mode 100644
index 0000000000..90bcc7323a
--- /dev/null
+++ b/.github/workflows/apk-speaker-diarization.yaml
@@ -0,0 +1,179 @@
+name: apk-speaker-diarization
+
+on:
+ push:
+ branches:
+ - apk
+
+ workflow_dispatch:
+
+concurrency:
+ group: apk-speaker-diarization-${{ github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: write
+
+jobs:
+ apk_speaker_identification:
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+ runs-on: ${{ matrix.os }}
+ name: apk for speaker diarization ${{ matrix.index }}/${{ matrix.total }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ total: ["1"]
+ index: ["0"]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ # https://github.com/actions/setup-java
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin' # See 'Supported distributions' for available options
+ java-version: '21'
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ key: ${{ matrix.os }}-android
+
+ - name: Display NDK HOME
+ shell: bash
+ run: |
+ echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+ ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+ - name: Install Python dependencies
+ shell: bash
+ run: |
+ python3 -m pip install --upgrade pip jinja2
+
+ - name: Setup build tool version variable
+ shell: bash
+ run: |
+ echo "---"
+ ls -lh /usr/local/lib/android/
+ echo "---"
+
+ ls -lh /usr/local/lib/android/sdk
+ echo "---"
+
+ ls -lh /usr/local/lib/android/sdk/build-tools
+ echo "---"
+
+ BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
+ echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
+ echo "Last build tool version is: $BUILD_TOOL_VERSION"
+
+ - name: Generate build script
+ shell: bash
+ run: |
+ cd scripts/apk
+
+ total=${{ matrix.total }}
+ index=${{ matrix.index }}
+
+ python3 ./generate-speaker-diarization-apk-script.py --total $total --index $index
+
+ chmod +x build-apk-speaker-diarization.sh
+ mv -v ./build-apk-speaker-diarization.sh ../..
+
+ - name: build APK
+ shell: bash
+ run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+ cmake --version
+
+ export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+ ./build-apk-speaker-diarization.sh
+
+ - name: Display APK
+ shell: bash
+ run: |
+ ls -lh ./apks/
+ du -h -d1 .
+
+ # https://github.com/marketplace/actions/sign-android-release
+ - uses: r0adkll/sign-android-release@v1
+ name: Sign app APK
+ with:
+ releaseDirectory: ./apks
+ signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
+ alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
+ keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
+ env:
+ BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}
+
+ - name: Display APK after signing
+ shell: bash
+ run: |
+ ls -lh ./apks/
+ du -h -d1 .
+
+ - name: Rename APK after signing
+ shell: bash
+ run: |
+ cd apks
+ rm -fv signingKey.jks
+ rm -fv *.apk.idsig
+ rm -fv *-aligned.apk
+
+ all_apks=$(ls -1 *-signed.apk)
+ echo "----"
+ echo $all_apks
+ echo "----"
+ for apk in ${all_apks[@]}; do
+ n=$(echo $apk | sed -e s/-signed//)
+ mv -v $apk $n
+ done
+
+ cd ..
+
+ ls -lh ./apks/
+ du -h -d1 .
+
+ - name: Display APK after rename
+ shell: bash
+ run: |
+ ls -lh ./apks/
+ du -h -d1 .
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ cd huggingface
+ git fetch
+ git pull
+ git merge -m "merge remote" --ff origin main
+
+ d=speaker-diarization/$SHERPA_ONNX_VERSION
+ mkdir -p $d/
+ cp -v ../apks/*.apk $d/
+ git status
+ git lfs track "*.apk"
+ git add .
+ git commit -m "add more apks"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main
diff --git a/.github/workflows/apk-speaker-identification.yaml b/.github/workflows/apk-speaker-identification.yaml
index ca89ec49f6..c88718d6e0 100644
--- a/.github/workflows/apk-speaker-identification.yaml
+++ b/.github/workflows/apk-speaker-identification.yaml
@@ -53,6 +53,23 @@ jobs:
run: |
python3 -m pip install --upgrade pip jinja2
+ - name: Setup build tool version variable
+ shell: bash
+ run: |
+ echo "---"
+ ls -lh /usr/local/lib/android/
+ echo "---"
+
+ ls -lh /usr/local/lib/android/sdk
+ echo "---"
+
+ ls -lh /usr/local/lib/android/sdk/build-tools
+ echo "---"
+
+ BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
+ echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
+ echo "Last build tool version is: $BUILD_TOOL_VERSION"
+
- name: Generate build script
shell: bash
run: |
@@ -82,6 +99,51 @@ jobs:
ls -lh ./apks/
du -h -d1 .
+ # https://github.com/marketplace/actions/sign-android-release
+ - uses: r0adkll/sign-android-release@v1
+ name: Sign app APK
+ with:
+ releaseDirectory: ./apks
+ signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
+ alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
+ keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
+ env:
+ BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}
+
+ - name: Display APK after signing
+ shell: bash
+ run: |
+ ls -lh ./apks/
+ du -h -d1 .
+
+ - name: Rename APK after signing
+ shell: bash
+ run: |
+ cd apks
+ rm -fv signingKey.jks
+ rm -fv *.apk.idsig
+ rm -fv *-aligned.apk
+
+ all_apks=$(ls -1 *-signed.apk)
+ echo "----"
+ echo $all_apks
+ echo "----"
+ for apk in ${all_apks[@]}; do
+ n=$(echo $apk | sed -e s/-signed//)
+ mv -v $apk $n
+ done
+
+ cd ..
+
+ ls -lh ./apks/
+ du -h -d1 .
+
+ - name: Display APK after rename
+ shell: bash
+ run: |
+ ls -lh ./apks/
+ du -h -d1 .
+
- name: Publish to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -101,7 +163,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-spoken-language-identification.yaml b/.github/workflows/apk-spoken-language-identification.yaml
index 3cb9c83b28..cc7525cd42 100644
--- a/.github/workflows/apk-spoken-language-identification.yaml
+++ b/.github/workflows/apk-spoken-language-identification.yaml
@@ -163,7 +163,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-tts-engine.yaml b/.github/workflows/apk-tts-engine.yaml
index d251483e4a..b8614cb76c 100644
--- a/.github/workflows/apk-tts-engine.yaml
+++ b/.github/workflows/apk-tts-engine.yaml
@@ -164,7 +164,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-tts.yaml b/.github/workflows/apk-tts.yaml
index dd0aa3f775..1609739c69 100644
--- a/.github/workflows/apk-tts.yaml
+++ b/.github/workflows/apk-tts.yaml
@@ -164,7 +164,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/apk-vad-asr.yaml b/.github/workflows/apk-vad-asr.yaml
index 8310043a9c..fe706aa14d 100644
--- a/.github/workflows/apk-vad-asr.yaml
+++ b/.github/workflows/apk-vad-asr.yaml
@@ -23,8 +23,8 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
- total: ["5"]
- index: ["0", "1", "2", "3", "4"]
+ total: ["10"]
+ index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
steps:
- uses: actions/checkout@v4
@@ -163,8 +163,9 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
+ du -h -d1 .
git fetch
git pull
git merge -m "merge remote" --ff origin main
diff --git a/.github/workflows/apk-vad.yaml b/.github/workflows/apk-vad.yaml
index 8253145b68..f1a4364fc0 100644
--- a/.github/workflows/apk-vad.yaml
+++ b/.github/workflows/apk-vad.yaml
@@ -160,13 +160,13 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
git merge -m "merge remote" --ff origin main
- d=vad/SHERPA_ONNX_VERSION
+ d=vad/$SHERPA_ONNX_VERSION
mkdir -p $d
cp -v ../apks/*.apk $d/
git status
diff --git a/.github/workflows/arm-linux-gnueabihf.yaml b/.github/workflows/arm-linux-gnueabihf.yaml
index a56b2cdad4..63a5cf414a 100644
--- a/.github/workflows/arm-linux-gnueabihf.yaml
+++ b/.github/workflows/arm-linux-gnueabihf.yaml
@@ -7,7 +7,6 @@ on:
- master
paths:
- '.github/workflows/arm-linux-gnueabihf.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/arm-linux-gnueabihf.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -62,7 +60,7 @@ jobs:
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
- sudo apt-get install autoconf automake autotools-dev ninja-build
+ sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev.
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -205,7 +203,7 @@ jobs:
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
- GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
mkdir -p arm32
diff --git a/.github/workflows/build-wheels-aarch64-cuda.yaml b/.github/workflows/build-wheels-aarch64-cuda.yaml
new file mode 100644
index 0000000000..a221553a4a
--- /dev/null
+++ b/.github/workflows/build-wheels-aarch64-cuda.yaml
@@ -0,0 +1,118 @@
+name: build-wheels-aarch64-cuda
+
+on:
+ push:
+ branches:
+ - wheel
+ workflow_dispatch:
+
+env:
+ SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1
+
+concurrency:
+ group: build-wheels-aarch64-cuda-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build_wheels_aarch64_cuda:
+ name: ${{ matrix.manylinux }} ${{ matrix.python-version }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-20.04]
+ python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
+ manylinux: [manylinux2014] #, manylinux_2_28]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v2
+ with:
+ platforms: all
+
+ # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
+ # for a list of versions
+ - name: Build wheels
+ uses: pypa/cibuildwheel@v2.21.3
+ env:
+ CIBW_BEFORE_ALL: |
+ git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
+ cd alsa-lib
+ ./gitcompile
+ cd ..
+ echo "PWD"
+ ls -lh /project/alsa-lib/src/.libs
+
+ CIBW_ENVIRONMENT: CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR SHERPA_ONNX_MAKE_ARGS="VERBOSE=1" SHERPA_ONNX_ENABLE_ALSA=1 SHERPA_ONNX_ENABLE_GPU=ON
+ CIBW_BUILD: "${{ matrix.python-version}}-* "
+ CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
+ CIBW_BUILD_VERBOSITY: 3
+ CIBW_ARCHS_LINUX: aarch64
+ CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_aarch64
+ # From onnxruntime >= 1.17.0, it drops support for CentOS 7.0 and it supports only manylinux_2_28.
+ # manylinux_2_24 is no longer supported
+
+ - name: Display wheels
+ shell: bash
+ run: |
+ ls -lh ./wheelhouse/
+
+ - name: Install patchelf
+ shell: bash
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y patchelf
+ patchelf --help
+
+ - name: Patch wheels
+ shell: bash
+ run: |
+ mkdir ./wheels
+ sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels
+
+ ls -lh ./wheels/
+ rm -rf ./wheelhouse
+ mv ./wheels ./wheelhouse
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+ d=cuda/$SHERPA_ONNX_VERSION
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ cd huggingface
+ git fetch
+ git pull
+ git merge -m "merge remote" --ff origin main
+
+ mkdir -p $d
+
+ cp -v ../wheelhouse/*.whl $d/
+
+ git status
+ git add .
+ git commit -m "add more wheels"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }}
+ path: ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-aarch64.yaml b/.github/workflows/build-wheels-aarch64.yaml
index 9d4ac571e5..1ba8ebd682 100644
--- a/.github/workflows/build-wheels-aarch64.yaml
+++ b/.github/workflows/build-wheels-aarch64.yaml
@@ -20,8 +20,8 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest]
- python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+ os: [ubuntu-20.04]
+ python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
manylinux: [manylinux2014] #, manylinux_2_28]
steps:
@@ -35,7 +35,7 @@ jobs:
# see https://cibuildwheel.readthedocs.io/en/stable/changelog/
# for a list of versions
- name: Build wheels
- uses: pypa/cibuildwheel@v2.16.5
+ uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_BEFORE_ALL: |
git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
@@ -60,7 +60,6 @@ jobs:
ls -lh ./wheelhouse/
- name: Install patchelf
- if: matrix.os == 'ubuntu-latest'
shell: bash
run: |
sudo apt-get update -q
@@ -69,7 +68,6 @@ jobs:
- name: Patch wheels
shell: bash
- if: matrix.os == 'ubuntu-latest'
run: |
mkdir ./wheels
sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels
@@ -99,7 +97,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -125,6 +123,6 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --upgrade pip
- python3 -m pip install wheel twine setuptools
+ python3 -m pip install wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-armv7l.yaml b/.github/workflows/build-wheels-armv7l.yaml
index 05c3b196dc..58a7cc8973 100644
--- a/.github/workflows/build-wheels-armv7l.yaml
+++ b/.github/workflows/build-wheels-armv7l.yaml
@@ -102,7 +102,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -129,6 +129,6 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --upgrade pip
- python3 -m pip install wheel twine setuptools
+ python3 -m pip install wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-linux-cuda.yaml b/.github/workflows/build-wheels-linux-cuda.yaml
index b1ee898250..1801840abc 100644
--- a/.github/workflows/build-wheels-linux-cuda.yaml
+++ b/.github/workflows/build-wheels-linux-cuda.yaml
@@ -21,7 +21,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-20.04]
- python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+ python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
@@ -34,7 +34,7 @@ jobs:
- name: Install Python dependencies
shell: bash
run: |
- pip install -U pip wheel setuptools twine
+ pip install -U pip wheel setuptools twine==5.0.0
- name: Build alsa-lib
shell: bash
@@ -113,7 +113,7 @@ jobs:
d=cuda/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/build-wheels-linux.yaml b/.github/workflows/build-wheels-linux.yaml
index e16f5bb9a7..0380e2a993 100644
--- a/.github/workflows/build-wheels-linux.yaml
+++ b/.github/workflows/build-wheels-linux.yaml
@@ -20,8 +20,8 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest]
- python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+ os: [ubuntu-20.04]
+ python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
manylinux: [manylinux2014] #, manylinux_2_28]
@@ -31,7 +31,7 @@ jobs:
# see https://cibuildwheel.readthedocs.io/en/stable/changelog/
# for a list of versions
- name: Build wheels
- uses: pypa/cibuildwheel@v2.16.5
+ uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_BEFORE_ALL: |
git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
@@ -96,7 +96,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -118,7 +118,7 @@ jobs:
shell: bash
run: |
python3 -m pip install --upgrade pip
- python3 -m pip install wheel twine setuptools
+ python3 -m pip install wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-macos-arm64.yaml b/.github/workflows/build-wheels-macos-arm64.yaml
index ce899c5d19..fe1d316281 100644
--- a/.github/workflows/build-wheels-macos-arm64.yaml
+++ b/.github/workflows/build-wheels-macos-arm64.yaml
@@ -21,13 +21,13 @@ jobs:
fail-fast: false
matrix:
os: [macos-13]
- python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+ python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
steps:
- uses: actions/checkout@v4
- name: Build wheels
- uses: pypa/cibuildwheel@v2.15.0
+ uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'"
@@ -68,7 +68,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -95,6 +95,6 @@ jobs:
fi
python3 -m pip install $opts --upgrade pip
- python3 -m pip install $opts wheel twine setuptools
+ python3 -m pip install $opts wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-macos-universal2.yaml b/.github/workflows/build-wheels-macos-universal2.yaml
index 4578d370e9..0f9dcedc78 100644
--- a/.github/workflows/build-wheels-macos-universal2.yaml
+++ b/.github/workflows/build-wheels-macos-universal2.yaml
@@ -21,13 +21,13 @@ jobs:
fail-fast: false
matrix:
os: [macos-latest]
- python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+ python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
steps:
- uses: actions/checkout@v4
- name: Build wheels
- uses: pypa/cibuildwheel@v2.15.0
+ uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64;x86_64'"
@@ -68,7 +68,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -89,6 +89,6 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --break-system-packages --upgrade pip
- python3 -m pip install --break-system-packages wheel twine setuptools
+ python3 -m pip install --break-system-packages wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-macos-x64.yaml b/.github/workflows/build-wheels-macos-x64.yaml
index b7bf6ff54b..cbb4792e93 100644
--- a/.github/workflows/build-wheels-macos-x64.yaml
+++ b/.github/workflows/build-wheels-macos-x64.yaml
@@ -21,7 +21,7 @@ jobs:
fail-fast: false
matrix:
os: [macos-13]
- python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+ python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
steps:
- uses: actions/checkout@v4
@@ -42,7 +42,7 @@ jobs:
- name: Build wheels
if: matrix.python-version != 'cp37'
- uses: pypa/cibuildwheel@v2.15.0
+ uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='x86_64'"
@@ -83,7 +83,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -110,6 +110,6 @@ jobs:
fi
python3 -m pip install $opts --upgrade pip
- python3 -m pip install $opts wheel twine setuptools
+ python3 -m pip install $opts wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-win32.yaml b/.github/workflows/build-wheels-win32.yaml
index 2560847830..732a17d7b5 100644
--- a/.github/workflows/build-wheels-win32.yaml
+++ b/.github/workflows/build-wheels-win32.yaml
@@ -21,7 +21,7 @@ jobs:
fail-fast: false
matrix:
os: [windows-latest]
- python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+ python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
steps:
- uses: actions/checkout@v4
@@ -29,7 +29,7 @@ jobs:
# see https://cibuildwheel.readthedocs.io/en/stable/changelog/
# for a list of versions
- name: Build wheels
- uses: pypa/cibuildwheel@v2.16.5
+ uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-A Win32"
CIBW_BUILD: "${{ matrix.python-version}}-* "
@@ -67,7 +67,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -88,6 +88,6 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --upgrade pip
- python3 -m pip install wheel twine setuptools
+ python3 -m pip install wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-win64-cuda.yaml b/.github/workflows/build-wheels-win64-cuda.yaml
index f0a17da8cc..27b4fb87eb 100644
--- a/.github/workflows/build-wheels-win64-cuda.yaml
+++ b/.github/workflows/build-wheels-win64-cuda.yaml
@@ -21,7 +21,7 @@ jobs:
fail-fast: false
matrix:
os: [windows-2019]
- python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+ python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
@@ -75,7 +75,7 @@ jobs:
d=cuda/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/build-wheels-win64.yaml b/.github/workflows/build-wheels-win64.yaml
index 14e3e2ac4d..f2cc7c157a 100644
--- a/.github/workflows/build-wheels-win64.yaml
+++ b/.github/workflows/build-wheels-win64.yaml
@@ -21,7 +21,7 @@ jobs:
fail-fast: false
matrix:
os: [windows-2019]
- python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+ python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
@@ -73,7 +73,7 @@ jobs:
d=cpu/$SHERPA_ONNX_VERSION
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
@@ -94,6 +94,6 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --upgrade pip
- python3 -m pip install wheel twine setuptools
+ python3 -m pip install wheel twine==5.0.0 setuptools
twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-xcframework.yaml b/.github/workflows/build-xcframework.yaml
index 2afd95cab9..8fcfafd43d 100644
--- a/.github/workflows/build-xcframework.yaml
+++ b/.github/workflows/build-xcframework.yaml
@@ -43,6 +43,13 @@ jobs:
steps:
- uses: actions/checkout@v4
+ - name: Build iOS shared
+ if: matrix.with_tts == 'ON'
+ shell: bash
+ run: |
+ export CMAKE_VERBOSE_MAKEFILE=ON
+ ./build-ios-shared.sh
+
- name: Build iOS
if: matrix.with_tts == 'ON'
shell: bash
@@ -135,7 +142,7 @@ jobs:
rm -rf huggingface
export GIT_CLONE_PROTECTION_ACTIVE=false
- GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
diff --git a/.github/workflows/c-api-from-buffer.yaml b/.github/workflows/c-api-from-buffer.yaml
index 4352cd7ce9..5d9bc11db0 100644
--- a/.github/workflows/c-api-from-buffer.yaml
+++ b/.github/workflows/c-api-from-buffer.yaml
@@ -8,7 +8,6 @@ on:
- 'v[0-9]+.[0-9]+.[0-9]+*'
paths:
- '.github/workflows/c-api-from-buffer.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/c-api-from-buffer.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -215,4 +213,4 @@ jobs:
./keywords-spotter-buffered-tokens-keywords-c-api
- rm -rf sherpa-onnx-kws-zipformer-*
\ No newline at end of file
+ rm -rf sherpa-onnx-kws-zipformer-*
diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml
index 589bda71f1..4a4108c989 100644
--- a/.github/workflows/c-api.yaml
+++ b/.github/workflows/c-api.yaml
@@ -4,11 +4,8 @@ on:
push:
branches:
- master
- tags:
- - 'v[0-9]+.[0-9]+.[0-9]+*'
paths:
- '.github/workflows/c-api.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +16,6 @@ on:
- master
paths:
- '.github/workflows/c-api.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -83,6 +79,201 @@ jobs:
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
fi
+ - name: Test kws (zh)
+ shell: bash
+ run: |
+ gcc -o kws-c-api ./c-api-examples/kws-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+ tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+ rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./kws-c-api
+
+ rm ./kws-c-api
+ rm -rf sherpa-onnx-kws-*
+
+ - name: Test Kokoro TTS (zh+en)
+ shell: bash
+ run: |
+ gcc -o kokoro-tts-zh-en-c-api ./c-api-examples/kokoro-tts-zh-en-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+ tar xf kokoro-multi-lang-v1_0.tar.bz2
+ rm kokoro-multi-lang-v1_0.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./kokoro-tts-zh-en-c-api
+
+ rm ./kokoro-tts-zh-en-c-api
+ rm -rf kokoro-zh-en-*
+
+ - name: Test Kokoro TTS (en)
+ shell: bash
+ run: |
+ gcc -o kokoro-tts-en-c-api ./c-api-examples/kokoro-tts-en-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+ tar xf kokoro-en-v0_19.tar.bz2
+ rm kokoro-en-v0_19.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./kokoro-tts-en-c-api
+
+ rm ./kokoro-tts-en-c-api
+ rm -rf kokoro-en-*
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: kokoro-tts-${{ matrix.os }}
+ path: ./generated-kokoro-*.wav
+
+ - name: Test Matcha TTS (zh)
+ shell: bash
+ run: |
+ gcc -o matcha-tts-zh-c-api ./c-api-examples/matcha-tts-zh-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./matcha-tts-zh-c-api
+
+ rm ./matcha-tts-zh-c-api
+ rm -rf matcha-icefall-*
+ rm hifigan_v2.onnx
+
+ - name: Test Matcha TTS (en)
+ shell: bash
+ run: |
+ gcc -o matcha-tts-en-c-api ./c-api-examples/matcha-tts-en-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./matcha-tts-en-c-api
+
+ rm ./matcha-tts-en-c-api
+ rm -rf matcha-icefall-*
+ rm hifigan_v2.onnx
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: matcha-tts-${{ matrix.os }}
+ path: ./generated-matcha-*.wav
+
+ - name: Test vad + Whisper tiny.en
+ shell: bash
+ run: |
+ gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ # Now download models
+ #
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+ rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./vad-whisper-c-api
+
+ rm -rf sherpa-onnx-*
+ rm -rf *.onnx
+ rm *.wav
+
+ - name: Test vad + Moonshine
+ shell: bash
+ run: |
+ gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ # Now download models
+ #
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./vad-moonshine-c-api
+
+ rm -rf sherpa-onnx-*
+ rm -rf *.onnx
+ rm *.wav
+
+ - name: Test Moonshine
+ shell: bash
+ run: |
+ gcc -o moonshine-c-api ./c-api-examples/moonshine-c-api.c \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./moonshine-c-api
+
+ rm -rf sherpa-onnx-*
+
- name: Test ffmpeg
if: matrix.os == 'macos-latest'
shell: bash
diff --git a/.github/workflows/checksum.yaml b/.github/workflows/checksum.yaml
new file mode 100644
index 0000000000..e500209d60
--- /dev/null
+++ b/.github/workflows/checksum.yaml
@@ -0,0 +1,21 @@
+name: Create checksum
+
+on:
+ schedule:
+ - cron: "0 1 * * *" # Runs at 1:00 AM UTC daily
+ workflow_dispatch:
+
+jobs:
+ checksum:
+ if: github.repository_owner == 'k2-fsa'
+ runs-on: macos-latest
+ strategy:
+ matrix:
+ tag: [null, asr-models, tts-models, kws-models, speaker-recongition-models, audio-tagging-models, punctuation-models]
+ steps:
+ - name: Run checksum action
+ uses: thewh1teagle/checksum@v1
+ with:
+ tag: ${{ matrix.tag }}
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/cxx-api.yaml b/.github/workflows/cxx-api.yaml
new file mode 100644
index 0000000000..e5a99fb09b
--- /dev/null
+++ b/.github/workflows/cxx-api.yaml
@@ -0,0 +1,332 @@
+name: cxx-api
+
+on:
+ push:
+ branches:
+ - master
+ - cxx-api-asr-non-streaming
+ paths:
+ - '.github/workflows/cxx-api.yaml'
+ - 'cmake/**'
+ - 'sherpa-onnx/csrc/*'
+ - 'sherpa-onnx/c-api/*'
+ - 'cxx-api-examples/**'
+ pull_request:
+ branches:
+ - master
+ paths:
+ - '.github/workflows/cxx-api.yaml'
+ - 'cmake/**'
+ - 'sherpa-onnx/csrc/*'
+ - 'sherpa-onnx/c-api/*'
+ - 'cxx-api-examples/**'
+
+ workflow_dispatch:
+
+concurrency:
+ group: cxx-api-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ cxx_api:
+ name: ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, macos-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ key: ${{ matrix.os }}-cxx-api-shared
+
+ - name: Build sherpa-onnx
+ shell: bash
+ run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+ cmake --version
+
+ mkdir build
+ cd build
+
+ cmake \
+ -D CMAKE_BUILD_TYPE=Release \
+ -D BUILD_SHARED_LIBS=ON \
+ -D CMAKE_INSTALL_PREFIX=./install \
+ -D SHERPA_ONNX_ENABLE_BINARY=OFF \
+ ..
+
+ make -j2 install
+
+ ls -lh install/lib
+ ls -lh install/include
+
+ if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+ ldd ./install/lib/libsherpa-onnx-c-api.so
+ ldd ./install/lib/libsherpa-onnx-cxx-api.so
+ echo "---"
+ readelf -d ./install/lib/libsherpa-onnx-c-api.so
+ readelf -d ./install/lib/libsherpa-onnx-cxx-api.so
+ fi
+
+ if [[ ${{ matrix.os }} == macos-latest ]]; then
+ otool -L ./install/lib/libsherpa-onnx-c-api.dylib
+ otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
+ fi
+
+ - name: Test KWS (zh)
+ shell: bash
+ run: |
+ g++ -std=c++17 -o kws-cxx-api ./cxx-api-examples/kws-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+ tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+ rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./kws-cxx-api
+
+ rm kws-cxx-api
+ rm -rf sherpa-onnx-kws-*
+
+ - name: Test Kokoro TTS (zh+en)
+ shell: bash
+ run: |
+ g++ -std=c++17 -o kokoro-tts-zh-en-cxx-api ./cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+ tar xf kokoro-multi-lang-v1_0.tar.bz2
+ rm kokoro-multi-lang-v1_0.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./kokoro-tts-zh-en-cxx-api
+
+ rm kokoro-tts-zh-en-cxx-api
+ rm -rf kokoro-*
+
+ - name: Test Kokoro TTS (en)
+ shell: bash
+ run: |
+ g++ -std=c++17 -o kokoro-tts-en-cxx-api ./cxx-api-examples/kokoro-tts-en-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+ tar xf kokoro-en-v0_19.tar.bz2
+ rm kokoro-en-v0_19.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./kokoro-tts-en-cxx-api
+
+ rm kokoro-tts-en-cxx-api
+ rm -rf kokoro-en-*
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: kokoro-tts-${{ matrix.os }}
+ path: ./generated-kokoro-*.wav
+
+ - name: Test Matcha TTS (zh)
+ shell: bash
+ run: |
+ g++ -std=c++17 -o matcha-tts-zh-cxx-api ./cxx-api-examples/matcha-tts-zh-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./matcha-tts-zh-cxx-api
+
+ rm -rf matcha-icefall-*
+ rm hifigan_v2.onnx
+ rm matcha-tts-zh-cxx-api
+
+ - name: Test Matcha TTS (en)
+ shell: bash
+ run: |
+ g++ -std=c++17 -o matcha-tts-en-cxx-api ./cxx-api-examples/matcha-tts-en-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./matcha-tts-en-cxx-api
+
+ rm matcha-tts-en-cxx-api
+ rm -rf matcha-icefall-*
+ rm hifigan_v2.onnx
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: matcha-tts-${{ matrix.os }}
+ path: ./generated-matcha-*.wav
+
+ - name: Test Moonshine tiny
+ shell: bash
+ run: |
+ g++ -std=c++17 -o moonshine-cxx-api ./cxx-api-examples/moonshine-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./moonshine-cxx-api
+
+ rm -rf sherpa-onnx-*
+ rm ./moonshine-cxx-api
+
+ - name: Test whisper
+ shell: bash
+ run: |
+ g++ -std=c++17 -o whisper-cxx-api ./cxx-api-examples/whisper-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ ls -lh whisper-cxx-api
+
+ if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+ ldd ./whisper-cxx-api
+ echo "----"
+ readelf -d ./whisper-cxx-api
+ fi
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+ rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+ ls -lh sherpa-onnx-whisper-tiny.en
+ echo "---"
+ ls -lh sherpa-onnx-whisper-tiny.en/test_wavs
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./whisper-cxx-api
+
+ rm -rf sherpa-onnx-whisper-*
+ rm ./whisper-cxx-api
+
+ - name: Test SenseVoice
+ shell: bash
+ run: |
+ g++ -std=c++17 -o sense-voice-cxx-api ./cxx-api-examples/sense-voice-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ ls -lh sense-voice-cxx-api
+
+ if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+ ldd ./sense-voice-cxx-api
+ echo "----"
+ readelf -d ./sense-voice-cxx-api
+ fi
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+ tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+ rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+
+ ls -lh sherpa-onnx-sense-voice-*
+ echo "---"
+ ls -lh sherpa-onnx-sense-voice-*/test_wavs
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./sense-voice-cxx-api
+
+ rm -rf sherpa-onnx-sense-voice-*
+ rm ./sense-voice-cxx-api
+
+ - name: Test streaming zipformer
+ shell: bash
+ run: |
+ g++ -std=c++17 -o streaming-zipformer-cxx-api ./cxx-api-examples/streaming-zipformer-cxx-api.cc \
+ -I ./build/install/include \
+ -L ./build/install/lib/ \
+ -l sherpa-onnx-cxx-api \
+ -l sherpa-onnx-c-api \
+ -l onnxruntime
+
+ ls -lh streaming-zipformer-cxx-api
+
+ if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+ ldd ./streaming-zipformer-cxx-api
+ echo "----"
+ readelf -d ./streaming-zipformer-cxx-api
+ fi
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+ tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+
+ ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+ echo "---"
+ ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs
+
+ export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+ ./streaming-zipformer-cxx-api
+
+ rm -rf sherpa-onnx-streaming-zipformer-*
+ rm ./streaming-zipformer-cxx-api
diff --git a/.github/workflows/dot-net.yaml b/.github/workflows/dot-net.yaml
index 36637a9e2c..899cb99956 100644
--- a/.github/workflows/dot-net.yaml
+++ b/.github/workflows/dot-net.yaml
@@ -90,7 +90,7 @@ jobs:
export GIT_CLONE_PROTECTION_ACTIVE=false
export GIT_LFS_SKIP_SMUDGE=1
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
git fetch
@@ -125,9 +125,7 @@ jobs:
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
- dotnet-version: |
- 6.0.x
- 7.0.x
+ dotnet-version: 8.0.x
- name: Install Python dependencies
shell: bash
diff --git a/.github/workflows/export-3dspeaker-to-onnx.yaml b/.github/workflows/export-3dspeaker-to-onnx.yaml
index 42c965c909..e62d42784b 100644
--- a/.github/workflows/export-3dspeaker-to-onnx.yaml
+++ b/.github/workflows/export-3dspeaker-to-onnx.yaml
@@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [macos-latest]
+ os: [ubuntu-latest]
python-version: ["3.8"]
steps:
@@ -43,3 +43,28 @@ jobs:
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: speaker-recongition-models
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ d=speaker-embedding-models
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+ mv -v ./*.onnx ./huggingface
+ cd huggingface
+ git lfs track "*.onnx"
+ git status
+ git add .
+ git status
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-ced-to-onnx.yaml b/.github/workflows/export-ced-to-onnx.yaml
index 70c4cc5fb5..2f714bb80b 100644
--- a/.github/workflows/export-ced-to-onnx.yaml
+++ b/.github/workflows/export-ced-to-onnx.yaml
@@ -66,7 +66,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
d=sherpa-onnx-ced-$m-audio-tagging-2024-04-19
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/k2-fsa/$d huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/$d huggingface
mv -v $d/* huggingface
cd huggingface
git lfs track "*.onnx"
diff --git a/.github/workflows/export-kokoro.yaml b/.github/workflows/export-kokoro.yaml
new file mode 100644
index 0000000000..e6aae1da62
--- /dev/null
+++ b/.github/workflows/export-kokoro.yaml
@@ -0,0 +1,226 @@
+name: export-kokoro-to-onnx
+
+on:
+ push:
+ branches:
+ - export-kokoro
+
+ workflow_dispatch:
+
+concurrency:
+ group: export-kokoro-to-onnx-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ export-kokoro-to-onnx:
+ if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+ name: export kokoro ${{ matrix.version }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ version: ["0.19", "1.0"]
+ python-version: ["3.10"]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Python dependencies
+ shell: bash
+ run: |
+ pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch
+
+ - name: Run
+ shell: bash
+ run: |
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
+ tar xf espeak-ng-data.tar.bz2
+ rm espeak-ng-data.tar.bz2
+ cd scripts/kokoro
+ v=${{ matrix.version }}
+ if [[ $v = "0.19" ]]; then
+ ./run.sh
+ elif [[ $v == "1.0" ]]; then
+ cd v1.0
+ ./run.sh
+ fi
+
+ - name: Collect results ${{ matrix.version }}
+ if: matrix.version == '0.19'
+ shell: bash
+ run: |
+ src=scripts/kokoro
+
+ d=kokoro-en-v0_19
+ mkdir $d
+ cp -a LICENSE $d/LICENSE
+ cp -a espeak-ng-data $d/
+ cp -v $src/kokoro-v0_19.onnx $d/model.onnx
+ cp -v $src/voices.bin $d/
+ cp -v $src/tokens.txt $d/
+ cp -v $src/README-new.md $d/README.md
+ ls -lh $d/
+ tar cjfv $d.tar.bz2 $d
+ rm -rf $d
+
+ ls -lh $d.tar.bz2
+
+ - name: Collect results ${{ matrix.version }}
+ if: matrix.version == '1.0'
+ shell: bash
+ run: |
+ curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
+ tar xvf dict.tar.bz2
+ rm dict.tar.bz2
+
+ curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
+ curl -SL -o number-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
+ curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
+
+ src=scripts/kokoro/v1.0
+
+ d=kokoro-multi-lang-v1_0
+ mkdir $d
+ cp -a LICENSE $d/LICENSE
+ cp -a espeak-ng-data $d/
+ cp -v $src/kokoro.onnx $d/model.onnx
+ cp -v $src/voices.bin $d/
+ cp -v $src/tokens.txt $d/
+ cp -v $src/lexicon*.txt $d/
+ cp -v $src/README.md $d/README.md
+ cp -av dict $d/
+ cp -v ./*.fst $d/
+ ls -lh $d/
+ echo "---"
+ ls -lh $d/dict
+
+ tar cjfv $d.tar.bz2 $d
+ rm -rf $d
+
+ ls -lh $d.tar.bz2
+
+ - name: Publish to huggingface ${{ matrix.version }}
+ if: matrix.version == '0.19'
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 huggingface
+ cd huggingface
+ rm -rf ./*
+ git fetch
+ git pull
+
+ git lfs track "cmn_dict"
+ git lfs track "ru_dict"
+ git lfs track "*.wav"
+
+ cp -a ../espeak-ng-data ./
+ mkdir -p test_wavs
+
+ cp -v ../scripts/kokoro/kokoro-v0_19.onnx ./model.onnx
+
+ cp -v ../scripts/kokoro/kokoro-v0_19-*.wav ./test_wavs/
+
+ cp -v ../scripts/kokoro/tokens.txt .
+ cp -v ../scripts/kokoro/voices.bin .
+ cp -v ../scripts/kokoro/README-new.md ./README.md
+ cp -v ../LICENSE ./
+
+ git lfs track "*.onnx"
+ git add .
+
+ ls -lh
+
+ git status
+
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
+
+ - name: Publish to huggingface ${{ matrix.version }}
+ if: matrix.version == '1.0'
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 huggingface
+ cd huggingface
+ rm -rf ./*
+ git fetch
+ git pull
+
+ git lfs track "cmn_dict"
+ git lfs track "ru_dict"
+ git lfs track "*.wav"
+ git lfs track "lexicon*.txt"
+
+ cp -a ../espeak-ng-data ./
+
+ cp -v ../scripts/kokoro/v1.0/kokoro.onnx ./model.onnx
+
+
+ cp -v ../scripts/kokoro/v1.0/tokens.txt .
+ cp -v ../scripts/kokoro/v1.0/voices.bin .
+ cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
+ cp -v ../scripts/kokoro/v1.0/README.md ./README.md
+ cp -v ../LICENSE ./
+ cp -av ../dict ./
+ cp -v ../*.fst ./
+
+ git lfs track "*.onnx"
+ git add .
+
+ ls -lh
+
+ git status
+
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
+
+ - name: Release
+ if: github.repository_owner == 'csukuangfj'
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ file: ./*.tar.bz2
+ overwrite: true
+ repo_name: k2-fsa/sherpa-onnx
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ tag: tts-models
+
+ - name: Release
+ if: github.repository_owner == 'k2-fsa'
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ file: ./*.tar.bz2
+ overwrite: true
+ tag: tts-models
diff --git a/.github/workflows/export-libriheavy.yaml b/.github/workflows/export-libriheavy.yaml
index cfe0a28d20..69c22ef243 100644
--- a/.github/workflows/export-libriheavy.yaml
+++ b/.github/workflows/export-libriheavy.yaml
@@ -56,7 +56,7 @@ jobs:
src=sherpa-onnx-zipformer-en-libriheavy-20230926-$m
echo "Process $src"
- git clone https://huggingface.co/csukuangfj/$src huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
cd huggingface
git fetch
git pull
@@ -100,7 +100,7 @@ jobs:
src=sherpa-onnx-zipformer-en-libriheavy-20230830-$m-punct-case
echo "Process $src"
- git clone https://huggingface.co/csukuangfj/$src huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/export-melo-tts-to-onnx.yaml b/.github/workflows/export-melo-tts-to-onnx.yaml
index 0dc9bfe9d7..d0715b95a3 100644
--- a/.github/workflows/export-melo-tts-to-onnx.yaml
+++ b/.github/workflows/export-melo-tts-to-onnx.yaml
@@ -40,7 +40,7 @@ jobs:
name: test.wav
path: scripts/melo-tts/test.wav
- - name: Publish to huggingface
+ - name: Publish to huggingface (Chinese + English)
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
@@ -56,19 +56,19 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/vits-melo-tts-zh_en huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en huggingface
cd huggingface
git fetch
git pull
echo "pwd: $PWD"
- ls -lh ../scripts/melo-tts
+ ls -lh ../scripts/melo-tts/zh_en
rm -rf ./
- cp -v ../scripts/melo-tts/*.onnx .
- cp -v ../scripts/melo-tts/lexicon.txt .
- cp -v ../scripts/melo-tts/tokens.txt .
- cp -v ../scripts/melo-tts/README.md .
+ cp -v ../scripts/melo-tts/zh_en/*.onnx .
+ cp -v ../scripts/melo-tts/zh_en/lexicon.txt .
+ cp -v ../scripts/melo-tts/zh_en/tokens.txt .
+ cp -v ../scripts/melo-tts/zh_en/README.md .
curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE
@@ -102,6 +102,60 @@ jobs:
tar cjvf $dst.tar.bz2 $dst
rm -rf $dst
+ - name: Publish to huggingface (English)
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en huggingface
+ cd huggingface
+ git fetch
+ git pull
+ echo "pwd: $PWD"
+ ls -lh ../scripts/melo-tts/en
+
+ rm -rf ./
+
+ cp -v ../scripts/melo-tts/en/*.onnx .
+ cp -v ../scripts/melo-tts/en/lexicon.txt .
+ cp -v ../scripts/melo-tts/en/tokens.txt .
+ cp -v ../scripts/melo-tts/en/README.md .
+
+ curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE
+
+ git lfs track "*.onnx"
+ git add .
+
+ ls -lh
+
+ git status
+
+ git diff
+
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en main || true
+
+ cd ..
+
+ rm -rf huggingface/.git*
+ dst=vits-melo-tts-en
+
+ mv huggingface $dst
+
+ tar cjvf $dst.tar.bz2 $dst
+ rm -rf $dst
+
- name: Release
uses: svenstaro/upload-release-action@v2
with:
diff --git a/.github/workflows/export-moonshine-to-onnx.yaml b/.github/workflows/export-moonshine-to-onnx.yaml
new file mode 100644
index 0000000000..2e73c2e049
--- /dev/null
+++ b/.github/workflows/export-moonshine-to-onnx.yaml
@@ -0,0 +1,106 @@
+name: export-moonshine-to-onnx
+
+on:
+ workflow_dispatch:
+
+concurrency:
+ group: export-moonshine-to-onnx-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ export-moonshine-to-onnx:
+ if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+ name: export moonshine models to ONNX
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [macos-latest]
+ python-version: ["3.10"]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Python dependencies
+ shell: bash
+ run: |
+ pip install -q onnx onnxruntime librosa tokenizers soundfile
+
+ - name: Run
+ shell: bash
+ run: |
+ pushd scripts/moonshine
+ ./run.sh
+ popd
+
+ mv -v scripts/moonshine/*.tar.bz2 .
+ mv -v scripts/moonshine/sherpa-onnx-* ./
+
+ - name: Release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ file: ./*.tar.bz2
+ overwrite: true
+ repo_name: k2-fsa/sherpa-onnx
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ tag: asr-models
+
+ - name: Publish to huggingface (tiny)
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ d=sherpa-onnx-moonshine-tiny-en-int8
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+ mv -v $d/* ./huggingface
+ cd huggingface
+ git lfs track "*.onnx"
+ git lfs track "*.wav"
+ git status
+ git add .
+ git status
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
+ rm -rf huggingface
+
+ - name: Publish to huggingface (base)
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ d=sherpa-onnx-moonshine-base-en-int8
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+ mv -v $d/* ./huggingface
+ cd huggingface
+ git lfs track "*.onnx"
+ git lfs track "*.wav"
+ git status
+ git add .
+ git status
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
+ rm -rf huggingface
diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
index 138c708ad7..bbabfb60cb 100644
--- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
+++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
@@ -67,7 +67,7 @@ jobs:
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/$m huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
cp -av $m/* huggingface
cd huggingface
git lfs track "*.onnx"
diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
index 7a7b7fc4eb..4a7e2339ed 100644
--- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
+++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
@@ -67,7 +67,7 @@ jobs:
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/$m huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
cp -av $m/* huggingface
cd huggingface
git lfs track "*.onnx"
diff --git a/.github/workflows/export-nemo-giga-am-to-onnx.yaml b/.github/workflows/export-nemo-giga-am-to-onnx.yaml
new file mode 100644
index 0000000000..1af754d0b4
--- /dev/null
+++ b/.github/workflows/export-nemo-giga-am-to-onnx.yaml
@@ -0,0 +1,116 @@
+name: export-nemo-giga-am-to-onnx
+
+on:
+ workflow_dispatch:
+
+concurrency:
+ group: export-nemo-giga-am-to-onnx-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ export-nemo-am-giga-to-onnx:
+ if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+ name: export nemo GigaAM models to ONNX
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [macos-latest]
+ python-version: ["3.10"]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Run CTC
+ shell: bash
+ run: |
+ pushd scripts/nemo/GigaAM
+ ./run-ctc.sh
+ popd
+
+ d=sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24
+ mkdir $d
+ mkdir $d/test_wavs
+ rm scripts/nemo/GigaAM/model.onnx
+ mv -v scripts/nemo/GigaAM/*.int8.onnx $d/
+ cp -v scripts/nemo/GigaAM/*.md $d/
+ mv -v scripts/nemo/GigaAM/*.pdf $d/
+ mv -v scripts/nemo/GigaAM/tokens.txt $d/
+ mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
+ mv -v scripts/nemo/GigaAM/run-ctc.sh $d/
+ mv -v scripts/nemo/GigaAM/*-ctc.py $d/
+
+ ls -lh scripts/nemo/GigaAM/
+
+ ls -lh $d
+
+ tar cjvf ${d}.tar.bz2 $d
+
+ - name: Run Transducer
+ shell: bash
+ run: |
+ pushd scripts/nemo/GigaAM
+ ./run-rnnt.sh
+ popd
+
+ d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24
+ mkdir $d
+ mkdir $d/test_wavs
+
+ mv -v scripts/nemo/GigaAM/encoder.int8.onnx $d/
+ mv -v scripts/nemo/GigaAM/decoder.onnx $d/
+ mv -v scripts/nemo/GigaAM/joiner.onnx $d/
+
+ cp -v scripts/nemo/GigaAM/*.md $d/
+ mv -v scripts/nemo/GigaAM/*.pdf $d/
+ mv -v scripts/nemo/GigaAM/tokens.txt $d/
+ mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
+ mv -v scripts/nemo/GigaAM/run-rnnt.sh $d/
+ mv -v scripts/nemo/GigaAM/*-rnnt.py $d/
+
+ ls -lh scripts/nemo/GigaAM/
+
+ ls -lh $d
+
+ tar cjvf ${d}.tar.bz2 $d
+
+ - name: Release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ file: ./*.tar.bz2
+ overwrite: true
+ repo_name: k2-fsa/sherpa-onnx
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ tag: asr-models
+
+ - name: Publish to huggingface (Transducer)
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+ mv -v $d/* ./huggingface
+ cd huggingface
+ git lfs track "*.onnx"
+ git lfs track "*.wav"
+ git status
+ git add .
+ git status
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
index 180c3dc12a..5059664130 100644
--- a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
+++ b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
@@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest]
+ os: [macos-latest]
python-version: ["3.10"]
steps:
@@ -43,3 +43,28 @@ jobs:
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: speaker-recongition-models
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ d=speaker-embedding-models
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+ mv -v ./*.onnx ./huggingface
+ cd huggingface
+ git lfs track "*.onnx"
+ git status
+ git add .
+ git status
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-pyannote-segmentation-to-onnx.yaml b/.github/workflows/export-pyannote-segmentation-to-onnx.yaml
index 300aca500c..53f8dac7d4 100644
--- a/.github/workflows/export-pyannote-segmentation-to-onnx.yaml
+++ b/.github/workflows/export-pyannote-segmentation-to-onnx.yaml
@@ -29,7 +29,7 @@ jobs:
- name: Install pyannote
shell: bash
run: |
- pip install pyannote.audio onnx onnxruntime
+ pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3
- name: Run
shell: bash
@@ -75,7 +75,7 @@ jobs:
d=sherpa-onnx-pyannote-segmentation-3-0
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/$d huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
cp -v $d/* ./huggingface
cd huggingface
git lfs track "*.onnx"
diff --git a/.github/workflows/export-revai-segmentation-to-onnx.yaml b/.github/workflows/export-revai-segmentation-to-onnx.yaml
new file mode 100644
index 0000000000..d82f7c4e09
--- /dev/null
+++ b/.github/workflows/export-revai-segmentation-to-onnx.yaml
@@ -0,0 +1,86 @@
+name: export-revai-segmentation-to-onnx
+
+on:
+ workflow_dispatch:
+
+concurrency:
+ group: export-revai-segmentation-to-onnx-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ export-revai-segmentation-to-onnx:
+ if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+ name: export revai segmentation models to ONNX
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [macos-latest]
+ python-version: ["3.10"]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install pyannote
+ shell: bash
+ run: |
+ pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3
+
+ - name: Run
+ shell: bash
+ run: |
+ d=sherpa-onnx-reverb-diarization-v1
+ src=$PWD/$d
+ mkdir -p $src
+
+ pushd scripts/pyannote/segmentation
+ ./run-revai.sh
+ cp ./*.onnx $src/
+ cp ./README.md $src/
+ cp ./LICENSE $src/
+ cp ./run-revai.sh $src/run.sh
+ cp ./*.py $src/
+
+ popd
+ ls -lh $d
+ tar cjfv $d.tar.bz2 $d
+
+ - name: Release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ file: ./*.tar.bz2
+ overwrite: true
+ repo_name: k2-fsa/sherpa-onnx
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ tag: speaker-segmentation-models
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ d=sherpa-onnx-reverb-diarization-v1
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+ cp -v $d/* ./huggingface
+ cd huggingface
+ git lfs track "*.onnx"
+ git status
+ git add .
+ git status
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-sense-voice-to-onnx.yaml b/.github/workflows/export-sense-voice-to-onnx.yaml
index 41a9a31a64..1c3e917296 100644
--- a/.github/workflows/export-sense-voice-to-onnx.yaml
+++ b/.github/workflows/export-sense-voice-to-onnx.yaml
@@ -66,7 +66,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/export-telespeech-ctc.yaml b/.github/workflows/export-telespeech-ctc.yaml
index 102c3884eb..4f66d7ca4b 100644
--- a/.github/workflows/export-telespeech-ctc.yaml
+++ b/.github/workflows/export-telespeech-ctc.yaml
@@ -60,7 +60,7 @@ jobs:
export GIT_CLONE_PROTECTION_ACTIVE=false
- GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf
cp -a $src/* hf/
cd hf
git lfs track "*.pdf"
@@ -84,7 +84,7 @@ jobs:
export GIT_CLONE_PROTECTION_ACTIVE=false
rm -rf hf
- GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf
cp -a $src/* hf/
cd hf
git lfs track "*.pdf"
diff --git a/.github/workflows/export-wenet-to-onnx.yaml b/.github/workflows/export-wenet-to-onnx.yaml
index 626f477e61..7ef3a54b64 100644
--- a/.github/workflows/export-wenet-to-onnx.yaml
+++ b/.github/workflows/export-wenet-to-onnx.yaml
@@ -49,7 +49,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell huggingface
cd huggingface
git fetch
git pull
@@ -98,7 +98,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 huggingface
cd huggingface
git fetch
git pull
@@ -147,7 +147,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn huggingface
cd huggingface
git fetch
git pull
@@ -196,7 +196,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech huggingface
cd huggingface
git fetch
git pull
@@ -245,7 +245,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech huggingface
cd huggingface
git fetch
git pull
@@ -295,7 +295,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/export-wespeaker-to-onnx.yaml b/.github/workflows/export-wespeaker-to-onnx.yaml
index fd167ab211..05694f693a 100644
--- a/.github/workflows/export-wespeaker-to-onnx.yaml
+++ b/.github/workflows/export-wespeaker-to-onnx.yaml
@@ -48,3 +48,28 @@ jobs:
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: speaker-recongition-models
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ d=speaker-embedding-models
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+ mv -v ./*.onnx ./huggingface
+ cd huggingface
+ git lfs track "*.onnx"
+ git status
+ git add .
+ git status
+ git commit -m "add models"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml
index a50aa99d74..53aebdd3b6 100644
--- a/.github/workflows/export-whisper-to-onnx.yaml
+++ b/.github/workflows/export-whisper-to-onnx.yaml
@@ -145,7 +145,7 @@ jobs:
export GIT_LFS_SKIP_SMUDGE=1
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
rm -rf huggingface/*
diff --git a/.github/workflows/flutter-android.yaml b/.github/workflows/flutter-android.yaml
index 9752a82c6c..c2b1d01db1 100644
--- a/.github/workflows/flutter-android.yaml
+++ b/.github/workflows/flutter-android.yaml
@@ -214,7 +214,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/flutter-linux.yaml b/.github/workflows/flutter-linux.yaml
index b6b1fb9c84..f1fdd5ec71 100644
--- a/.github/workflows/flutter-linux.yaml
+++ b/.github/workflows/flutter-linux.yaml
@@ -261,7 +261,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/flutter-macos.yaml b/.github/workflows/flutter-macos.yaml
index 7c8a38e4c9..e85ff1644f 100644
--- a/.github/workflows/flutter-macos.yaml
+++ b/.github/workflows/flutter-macos.yaml
@@ -101,7 +101,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
cd huggingface
git fetch
git pull
@@ -207,7 +207,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/flutter-windows-x64.yaml b/.github/workflows/flutter-windows-x64.yaml
index f4d296b709..59f6a6af92 100644
--- a/.github/workflows/flutter-windows-x64.yaml
+++ b/.github/workflows/flutter-windows-x64.yaml
@@ -94,7 +94,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
cd huggingface
git fetch
git pull
@@ -192,7 +192,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
cd huggingface
git fetch
git pull
diff --git a/.github/workflows/hap-vad-asr.yaml b/.github/workflows/hap-vad-asr.yaml
new file mode 100644
index 0000000000..9e64a9ab16
--- /dev/null
+++ b/.github/workflows/hap-vad-asr.yaml
@@ -0,0 +1,173 @@
+name: hap-vad-asr
+
+on:
+ push:
+ branches:
+ - hap
+ - hap-ci
+
+ workflow_dispatch:
+
+concurrency:
+ group: hap-vad-asr-${{ github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: write
+
+jobs:
+ hap_vad_asr:
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+ runs-on: ${{ matrix.os }}
+ name: Haps for vad asr ${{ matrix.index }}/${{ matrix.total }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ total: ["10"]
+ index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ # https://github.com/actions/setup-java
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin' # See 'Supported distributions' for available options
+ java-version: '17' # it requires jdk 17 to sigh the hap
+
+ - name: Show java version
+ shell: bash
+ run: |
+ which java
+ java --version
+
+ - name: cache-toolchain
+ id: cache-toolchain-ohos
+ uses: actions/cache@v4
+ with:
+ path: command-line-tools
+ key: commandline-tools-linux-x64-5.0.5.200.zip
+
+ - name: Download toolchain
+ if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
+ shell: bash
+ run: |
+ curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+ unzip commandline-tools-linux-x64-5.0.5.200.zip
+ rm commandline-tools-linux-x64-5.0.5.200.zip
+
+ - name: Set environment variable
+ shell: bash
+ run: |
+ echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin" >> "$GITHUB_PATH"
+ which cmake
+
+ cmake --version
+
+ - name: Install Python dependencies
+ shell: bash
+ run: |
+ python3 -m pip install --upgrade pip jinja2
+
+ - name: Generate build script
+ shell: bash
+ run: |
+ cd scripts/hap
+
+ total=${{ matrix.total }}
+ index=${{ matrix.index }}
+
+ ./generate-vad-asr-hap-script.py --total $total --index $index
+ ls -lh
+
+ chmod +x build-hap-vad-asr.sh
+ mv -v ./build-hap-vad-asr.sh ../..
+
+ - name: Generate secrets
+ shell: bash
+ run: |
+ echo "${{ secrets.HAP_SHERPA_ONNX_CER }}" > /tmp/sherpa_onnx.cer
+ shasum -a 256 /tmp/sherpa_onnx.cer
+ ls -lh /tmp/sherpa_onnx.cer
+
+ # macos
+ # base64 -i sherpa_onnx_profileRelease.p7b -o sherpa_onnx_profileRelease.p7b.base64
+ #
+ # linux
+ # base64 -w 0 sherpa_onnx_profileRelease.p7b > sherpa_onnx_profileRelease.p7b.base64
+ #
+ # cat sherpa_onnx_profileRelease.p7b.base64 | base64 --decode > sherpa_onnx_profileRelease.p7b
+ #
+ echo "${{ secrets.HAP_SHERPA_ONNX_PROFILE }}" | base64 --decode > /tmp/sherpa_onnx_profileRelease.p7b
+ echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" > ./sherpa_onnx_ohos_key.p12.base64
+ echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" | base64 --decode > /tmp/sherpa_onnx_ohos_key.p12
+
+ ls -l /tmp/sherpa_onnx_profileRelease.p7b
+ ls -l /tmp/sherpa_onnx_ohos_key.p12
+
+ ls -lh ./sherpa_onnx_ohos_key.p12.base64
+ shasum -a 256 ./sherpa_onnx_ohos_key.p12.base64
+ wc ./sherpa_onnx_ohos_key.p12.base64
+ rm ./sherpa_onnx_ohos_key.p12.base64
+
+ shasum -a 256 /tmp/sherpa_onnx_profileRelease.p7b
+ shasum -a 256 /tmp/sherpa_onnx_ohos_key.p12
+
+ - name: build HAP
+ env:
+ HAP_KEY_ALIAS: ${{ secrets.HAP_KEY_ALIAS }}
+ HAP_KEY_PWD: ${{ secrets.HAP_KEY_PWD }}
+ HAP_KEY_STORE_PWD: ${{ secrets.HAP_KEY_STORE_PWD }}
+ shell: bash
+ run: |
+ export COMMANDLINE_TOOLS_DIR=$GITHUB_WORKSPACE/command-line-tools
+ ./build-hap-vad-asr.sh
+
+ # remove secrets
+ rm /tmp/sherpa_onnx.cer
+ rm /tmp/sherpa_onnx_profileRelease.p7b
+ rm /tmp/sherpa_onnx_ohos_key.p12
+
+ - name: Display HAPs
+ shell: bash
+ run: |
+ ls -lh ./haps/
+ du -h -d1 .
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface
+ cd huggingface
+ du -h -d1 .
+ git fetch
+ git pull
+ git merge -m "merge remote" --ff origin main
+
+ d=hap/vad-asr/$SHERPA_ONNX_VERSION
+ mkdir -p $d
+ cp -v ../haps/*.hap $d/
+ git status
+ git lfs track "*.hap"
+ git add .
+ git commit -m "add more HAPs"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main
diff --git a/.github/workflows/har.yaml b/.github/workflows/har.yaml
new file mode 100644
index 0000000000..7b5b2e5141
--- /dev/null
+++ b/.github/workflows/har.yaml
@@ -0,0 +1,214 @@
+name: har
+
+on:
+ push:
+ branches:
+ - master
+ # - ohos-har
+ tags:
+ - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+ workflow_dispatch:
+
+concurrency:
+ group: har-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ har:
+ name: Har
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ key: har-linux
+
+ - name: cache-toolchain
+ id: cache-toolchain-ohos
+ uses: actions/cache@v4
+ with:
+ path: command-line-tools
+ key: commandline-tools-linux-x64-5.0.5.200.zip
+
+ - name: Download toolchain
+ if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
+ shell: bash
+ run: |
+ curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+ unzip commandline-tools-linux-x64-5.0.5.200.zip
+ rm commandline-tools-linux-x64-5.0.5.200.zip
+
+ - name: Set environment variable
+ shell: bash
+ run: |
+ echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin" >> "$GITHUB_PATH"
+ which cmake
+
+ cmake --version
+
+ ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+
+ echo "===="
+ cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+ echo "===="
+
+ # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin" >> "$GITHUB_PATH"
+
+ ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/
+ echo "--"
+ ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown*
+
+ cat $GITHUB_PATH
+
+ # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true
+ export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH
+ echo "path: $PATH"
+
+ which aarch64-unknown-linux-ohos-clang++ || true
+ which aarch64-unknown-linux-ohos-clang || true
+
+ aarch64-unknown-linux-ohos-clang++ --version || true
+ aarch64-unknown-linux-ohos-clang --version || true
+
+ which armv7-unknown-linux-ohos-clang++
+ which armv7-unknown-linux-ohos-clang
+
+ armv7-unknown-linux-ohos-clang++ --version
+ armv7-unknown-linux-ohos-clang --version
+
+ which x86_64-unknown-linux-ohos-clang++
+ which x86_64-unknown-linux-ohos-clang
+
+ x86_64-unknown-linux-ohos-clang++ --version
+ x86_64-unknown-linux-ohos-clang --version
+
+ - name: Install tree
+ shell: bash
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -y -q tree
+
+ - name: Build libraries
+ shell: bash
+ run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+ cmake --version
+
+ export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native"
+
+ ./build-ohos-arm64-v8a.sh
+ ./build-ohos-x86-64.sh
+
+ - name: Build Har
+ shell: bash
+ run: |
+ export PATH="$GITHUB_WORKSPACE/command-line-tools/bin:$PATH"
+
+ which hvigorw
+
+ pushd harmony-os/SherpaOnnxHar
+
+ cp -fv ../../LICENSE ./sherpa_onnx
+ cp -fv ../../CHANGELOG.md ./sherpa_onnx
+
+ hvigorw --mode module -p product=default -p module=sherpa_onnx@default assembleHar --analyze=normal --parallel --incremental --no-daemon
+ ls -lh ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har
+ cp -v ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har ../../
+
+ popd
+
+ ls -lh *.har
+
+ - name: View Har
+ shell: bash
+ run: |
+ file sherpa_onnx.har
+ tar xvf sherpa_onnx.har
+
+ cd package
+ ls -lh
+
+ ls -lh libs
+ echo "---libs/x86_64---"
+ ls -lh libs/x86_64
+
+ echo "---libs/arm64-v8a---"
+ ls -lh libs/arm64-v8a
+
+ echo "---src/main/ets/components---"
+ ls -lh src/main/ets/components/
+
+ echo "---src/main/cpp/types/libsherpa_onnx/---"
+ ls -lh src/main/cpp/types/libsherpa_onnx/
+
+ tree .
+
+ - name: Collect result
+ shell: bash
+ run: |
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+ mv sherpa_onnx.har sherpa_onnx-$SHERPA_ONNX_VERSION.har
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: sherpa-onnx-har
+ path: ./sherpa_onnx*.har
+
+ - name: Release jar
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: ./*.har
+ # repo_name: k2-fsa/sherpa-onnx
+ # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ # tag: v1.10.32
+
+ - name: Publish to huggingface
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface
+ cd huggingface
+ git fetch
+ git pull
+ git merge -m "merge remote" --ff origin main
+
+ d=har
+ mkdir -p $d
+ cp -v ../*.har $d/
+ git status
+ git lfs track "*.har"
+ git add .
+ git commit -m "add more hars"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main
diff --git a/.github/workflows/harmony-os.yaml b/.github/workflows/harmony-os.yaml
new file mode 100644
index 0000000000..e1a2ae1a2e
--- /dev/null
+++ b/.github/workflows/harmony-os.yaml
@@ -0,0 +1,159 @@
+name: harmony-os
+
+on:
+ push:
+ branches:
+ - master
+ - ohos
+ tags:
+ - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+ workflow_dispatch:
+
+concurrency:
+ group: harmony-os-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ harmony_os:
+ name: Harmony OS ${{ matrix.arch }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ arch: [arm64-v8a, armeabi-v7a, x86_64]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ key: ohos-${{ matrix.arch }}
+
+ - name: cache-toolchain
+ id: cache-toolchain-ohos
+ uses: actions/cache@v4
+ with:
+ path: command-line-tools
+ key: commandline-tools-linux-x64-5.0.5.200.zip
+
+ - name: Download toolchain
+ if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
+ shell: bash
+ run: |
+ curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+ unzip commandline-tools-linux-x64-5.0.5.200.zip
+ rm commandline-tools-linux-x64-5.0.5.200.zip
+
+ - name: Set environment variable
+ shell: bash
+ run: |
+ echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin" >> "$GITHUB_PATH"
+ which cmake
+
+ cmake --version
+
+ ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+
+ echo "===="
+ cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+ echo "===="
+
+ # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin" >> "$GITHUB_PATH"
+
+ ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/
+ echo "--"
+ ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown*
+
+ cat $GITHUB_PATH
+
+ # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true
+ export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH
+ echo "path: $PATH"
+
+ which aarch64-unknown-linux-ohos-clang++ || true
+ which aarch64-unknown-linux-ohos-clang || true
+
+ aarch64-unknown-linux-ohos-clang++ --version || true
+ aarch64-unknown-linux-ohos-clang --version || true
+
+ which armv7-unknown-linux-ohos-clang++
+ which armv7-unknown-linux-ohos-clang
+
+ armv7-unknown-linux-ohos-clang++ --version
+ armv7-unknown-linux-ohos-clang --version
+
+ which x86_64-unknown-linux-ohos-clang++
+ which x86_64-unknown-linux-ohos-clang
+
+ x86_64-unknown-linux-ohos-clang++ --version
+ x86_64-unknown-linux-ohos-clang --version
+
+ - name: Build ${{ matrix.arch }}
+ shell: bash
+ run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+ cmake --version
+
+ arch=${{ matrix.arch }}
+
+ echo "arch: $arch"
+
+ export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native"
+
+ if [[ $arch == arm64-v8a ]]; then
+ ./build-ohos-arm64-v8a.sh
+ elif [[ $arch == armeabi-v7a ]]; then
+ ./build-ohos-armeabi-v7a.sh
+ elif [[ $arch == x86_64 ]]; then
+ ./build-ohos-x86-64.sh
+ else
+ echo "Unknown arch $arch"
+ fi
+
+ - name: Collect result for ${{ matrix.arch }}
+ shell: bash
+ run: |
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+ echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+ arch=${{ matrix.arch }}
+ d=sherpa-onnx-$SHERPA_ONNX_VERSION-ohos-$arch
+ if [[ $arch == x86_64 ]]; then
+ cd ./build-ohos-x86-64
+ else
+ cd ./build-ohos-$arch
+ fi
+
+ mv install $d
+ tar cjfv $d.tar.bz2 $d
+
+ ls -lh $d/lib
+
+
+ file $d/lib/*
+
+ readelf -d $d/lib/libsherpa-onnx-c-api.so
+
+ mv $d.tar.bz2 ../
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: sherpa-onnx-ohos-${{ matrix.arch }}
+ path: ./*.tar.bz2
+
+ - name: Release jar
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: ./*.tar.bz2
+ # repo_name: k2-fsa/sherpa-onnx
+ # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ # tag: v1.10.23
diff --git a/.github/workflows/jni.yaml b/.github/workflows/jni.yaml
index a0f7693937..3bce5cdcd6 100644
--- a/.github/workflows/jni.yaml
+++ b/.github/workflows/jni.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/jni.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'kotlin-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -16,7 +15,6 @@ on:
- master
paths:
- '.github/workflows/jni.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'kotlin-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -75,3 +73,8 @@ jobs:
cd ./kotlin-api-examples
./run.sh
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: tts-files-${{ matrix.os }}
+ path: kotlin-api-examples/test-*.wav
diff --git a/.github/workflows/lazarus.yaml b/.github/workflows/lazarus.yaml
index 11df536449..d28b7cba45 100644
--- a/.github/workflows/lazarus.yaml
+++ b/.github/workflows/lazarus.yaml
@@ -7,7 +7,6 @@ on:
- lazarus
paths:
- '.github/workflows/lazarus.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'lazarus-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/lazarus.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'lazarus-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -43,7 +41,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [ubuntu-20.04, macos-latest, macos-13, windows-latest]
+ os: [ubuntu-22.04, macos-latest, macos-13, windows-latest]
steps:
- uses: actions/checkout@v4
@@ -56,10 +54,10 @@ jobs:
key: ${{ matrix.os }}
# See https://github.com/gcarreno/setup-lazarus
- - uses: gcarreno/setup-lazarus@v3
+ - uses: gcarreno/setup-lazarus@v3.3.1
with:
lazarus-version: "stable"
- with-cache: true
+ with-cache: false
- name: Lazarus info
shell: bash
@@ -79,14 +77,14 @@ jobs:
uname -a
- name: Install patchelf for ubuntu
- if: matrix.os == 'ubuntu-20.04'
+ if: matrix.os == 'ubuntu-22.04'
shell: bash
run: |
sudo apt-get update -q
sudo apt-get install -q -y patchelf
- name: Show Patchelf version (ubuntu)
- if: matrix.os == 'ubuntu-20.04'
+ if: matrix.os == 'ubuntu-22.04'
shell: bash
run: |
patchelf --version
@@ -104,7 +102,7 @@ jobs:
cd build
os=${{ matrix.os }}
- if [[ $os == 'windows-latest' || $os == 'ubuntu-20.04' ]]; then
+ if [[ $os == 'windows-latest' || $os == 'ubuntu-22.04' ]]; then
BUILD_SHARED_LIBS=ON
else
BUILD_SHARED_LIBS=OFF
@@ -139,7 +137,7 @@ jobs:
lazbuild --verbose --build-mode=Release --widgetset=cocoa ./generate_subtitles.lpi
elif [[ $os == macos-latest ]]; then
lazbuild --verbose --build-mode=Release --widgetset=cocoa --cpu=aarch64 ./generate_subtitles.lpi
- elif [[ $os == 'ubuntu-20.04' ]]; then
+ elif [[ $os == 'ubuntu-22.04' ]]; then
lazbuild --verbose --build-mode=Release-Linux ./generate_subtitles.lpi
else
lazbuild --verbose --build-mode=Release ./generate_subtitles.lpi
@@ -152,7 +150,7 @@ jobs:
ls -lh
- name: Collect generating subtitles (Ubuntu)
- if: matrix.os == 'ubuntu-20.04'
+ if: matrix.os == 'ubuntu-22.04'
shell: bash
run: |
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
@@ -223,7 +221,7 @@ jobs:
ls -lh /tmp/macos-*
- uses: actions/upload-artifact@v4
- if: matrix.os == 'ubuntu-20.04'
+ if: matrix.os == 'ubuntu-22.04'
with:
name: linux-x64
path: /tmp/linux-x64
@@ -355,8 +353,9 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
- git clone https://huggingface.co/csukuangfj/sherpa-onnx-bin huggingface
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-bin huggingface
cd huggingface
+ git remote set-url origin https://csukuangfj:$HF_TOKEN@huggingface.co/sherpa-onnx-bin
git fetch
git pull
git merge -m "merge remote" --ff origin main
diff --git a/.github/workflows/linux-gpu.yaml b/.github/workflows/linux-gpu.yaml
index 2a9d0529d7..c1a97aa730 100644
--- a/.github/workflows/linux-gpu.yaml
+++ b/.github/workflows/linux-gpu.yaml
@@ -14,7 +14,6 @@ on:
- '.github/scripts/test-offline-ctc.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -31,7 +30,6 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/linux-jni-aarch64.yaml b/.github/workflows/linux-jni-aarch64.yaml
new file mode 100644
index 0000000000..19d1e09cf5
--- /dev/null
+++ b/.github/workflows/linux-jni-aarch64.yaml
@@ -0,0 +1,176 @@
+name: linux-jni-aarch64
+
+on:
+ push:
+ branches:
+ - jni
+ tags:
+ - 'v[0-9]+.[0-9]+.[0-9]+*'
+ workflow_dispatch:
+
+concurrency:
+ group: linux-jni-aarch64-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ linux-jni-aarch64:
+ name: linux jni aarch64
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ # java-version: ['8', '11', '16', '17', '21']
+ java-version: ['21']
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin' # See 'Supported distributions' for available options
+ java-version: ${{ matrix.java-version }}
+
+ - name: Set up QEMU
+ if: steps.cache-build-result.outputs.cache-hit != 'true'
+ uses: docker/setup-qemu-action@v2
+ with:
+ platforms: all
+
+ - name: Display PWD
+ shell: bash
+ run: |
+ echo "pwd: $PWD"
+ ls -lh
+ du -h -d1 .
+
+ - name: Build sherpa-onnx
+ if: matrix.java-version == '21'
+ uses: addnab/docker-run-action@v3
+ with:
+ image: quay.io/pypa/manylinux2014_aarch64
+ options: |
+ --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx
+ shell: bash
+ run: |
+ uname -a
+ gcc --version
+ cmake --version
+ cat /etc/*release
+ id
+ pwd
+
+ yum install -y java-11-openjdk-devel
+ java -version
+ which java
+ ls -lh $(which java)
+ ls -lrt /etc/alternatives/java
+
+ export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-2.el7_9.aarch64
+ echo "JAVA_HOME: $JAVA_HOME"
+ find $JAVA_HOME -name jni.h
+
+ cd /home/runner/work/sherpa-onnx/sherpa-onnx
+
+ git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
+ pushd alsa-lib
+ ./gitcompile
+ popd
+
+ export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
+ export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
+
+ mkdir build
+ cd build
+
+ cmake \
+ -D SHERPA_ONNX_ENABLE_TTS=ON \
+ -D CMAKE_BUILD_TYPE=Release \
+ -D BUILD_SHARED_LIBS=ON \
+ -D CMAKE_INSTALL_PREFIX=./install \
+ -D SHERPA_ONNX_ENABLE_BINARY=OFF \
+ -D SHERPA_ONNX_ENABLE_JNI=ON \
+ ..
+
+ make -j2
+ make install
+
+ ls -lh lib
+ rm -rf ./install/lib/pkgconfig
+ rm -rf ./install/lib/share
+ rm -rf ./install/lib/cargs.h
+ rm -rf ./install/include/cargs.h
+ rm -rf ./install/lib/libcargs.so
+ rm -rf ./install/lib/libsherpa-onnx-c-api.so
+
+ echo "----"
+ ls -lh install/lib
+
+ echo "----"
+
+ - uses: actions/upload-artifact@v4
+ if: matrix.java-version == '21'
+ with:
+ name: release-jni-linux-${{ matrix.java-version }}
+ path: build/install/*
+
+ - name: Copy files
+ if: matrix.java-version == '21'
+ shell: bash
+ run: |
+ du -h -d1 .
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+
+ dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-jni
+ mkdir $dst
+
+ cp -a build/install/lib $dst/
+ cp -a build/install/include $dst/
+
+ tree $dst
+
+ tar cjvf ${dst}.tar.bz2 $dst
+ du -h -d1 .
+
+ - name: Publish to huggingface
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && matrix.java-version == '21'
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v3
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+ cd huggingface
+ mkdir -p jni
+
+ cp -v ../sherpa-onnx-*.tar.bz2 ./jni
+ cp -v ../*.jar ./jni
+
+ git status
+ git lfs track "*.bz2"
+
+ git add .
+
+ git commit -m "add more files"
+
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+ - name: Release pre-compiled binaries and libs for linux aarch64
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.java-version == '21'
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: sherpa-onnx-*.tar.bz2
+
diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml
index 0e1eca0990..ea3bd2b4a5 100644
--- a/.github/workflows/linux.yaml
+++ b/.github/workflows/linux.yaml
@@ -18,7 +18,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -38,7 +40,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -143,14 +147,34 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: install/*
- - name: Test offline transducer
+ - name: Test offline TTS
+ if: matrix.with_tts == 'ON'
+ shell: bash
+ run: |
+ du -h -d1 .
+ export PATH=$PWD/build/bin:$PATH
+ export EXE=sherpa-onnx-offline-tts
+
+ .github/scripts/test-offline-tts.sh
+ du -h -d1 .
+
+ - uses: actions/upload-artifact@v4
+ if: matrix.with_tts == 'ON'
+ with:
+ name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
+ path: tts
+
+ - name: Test offline Moonshine
+ if: matrix.build_type != 'Debug'
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
- .github/scripts/test-offline-transducer.sh
+ readelf -d build/bin/sherpa-onnx-offline
+
+ .github/scripts/test-offline-moonshine.sh
du -h -d1 .
- name: Test offline CTC
@@ -163,6 +187,37 @@ jobs:
.github/scripts/test-offline-ctc.sh
du -h -d1 .
+ - name: Test C++ API
+ shell: bash
+ run: |
+ du -h -d1 .
+ export PATH=$PWD/build/bin:$PATH
+ export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
+ export CXX_WHISPER_EXE=whisper-cxx-api
+ export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api
+
+ .github/scripts/test-cxx-api.sh
+ du -h -d1 .
+
+ - name: Test offline speaker diarization
+ shell: bash
+ run: |
+ du -h -d1 .
+ export PATH=$PWD/build/bin:$PATH
+ export EXE=sherpa-onnx-offline-speaker-diarization
+
+ .github/scripts/test-speaker-diarization.sh
+
+ - name: Test offline transducer
+ shell: bash
+ run: |
+ du -h -d1 .
+ export PATH=$PWD/build/bin:$PATH
+ export EXE=sherpa-onnx-offline
+
+ .github/scripts/test-offline-transducer.sh
+ du -h -d1 .
+
- name: Test online punctuation
shell: bash
run: |
@@ -269,16 +324,7 @@ jobs:
.github/scripts/test-offline-whisper.sh
du -h -d1 .
- - name: Test offline TTS
- if: matrix.with_tts == 'ON'
- shell: bash
- run: |
- du -h -d1 .
- export PATH=$PWD/build/bin:$PATH
- export EXE=sherpa-onnx-offline-tts
- .github/scripts/test-offline-tts.sh
- du -h -d1 .
- name: Test online paraformer
shell: bash
@@ -327,8 +373,4 @@ jobs:
overwrite: true
file: sherpa-onnx-*.tar.bz2
- - uses: actions/upload-artifact@v4
- with:
- name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
- path: tts
diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml
index 084531e4a6..813b8fd0eb 100644
--- a/.github/workflows/macos.yaml
+++ b/.github/workflows/macos.yaml
@@ -18,7 +18,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -37,7 +39,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -115,6 +119,45 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
+ - name: Test offline TTS
+ if: matrix.with_tts == 'ON'
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin:$PATH
+ export EXE=sherpa-onnx-offline-tts
+
+ .github/scripts/test-offline-tts.sh
+
+ - name: Test offline Moonshine
+ if: matrix.build_type != 'Debug'
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin:$PATH
+ export EXE=sherpa-onnx-offline
+
+ .github/scripts/test-offline-moonshine.sh
+
+ - name: Test C++ API
+ shell: bash
+ run: |
+ du -h -d1 .
+ export PATH=$PWD/build/bin:$PATH
+ export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
+ export CXX_WHISPER_EXE=whisper-cxx-api
+ export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api
+
+ .github/scripts/test-cxx-api.sh
+ du -h -d1 .
+
+ - name: Test offline speaker diarization
+ shell: bash
+ run: |
+ du -h -d1 .
+ export PATH=$PWD/build/bin:$PATH
+ export EXE=sherpa-onnx-offline-speaker-diarization
+
+ .github/scripts/test-speaker-diarization.sh
+
- name: Test offline transducer
shell: bash
run: |
@@ -190,15 +233,6 @@ jobs:
.github/scripts/test-kws.sh
- - name: Test offline TTS
- if: matrix.with_tts == 'ON'
- shell: bash
- run: |
- export PATH=$PWD/build/bin:$PATH
- export EXE=sherpa-onnx-offline-tts
-
- .github/scripts/test-offline-tts.sh
-
- name: Test online paraformer
shell: bash
run: |
@@ -216,8 +250,6 @@ jobs:
.github/scripts/test-offline-whisper.sh
-
-
- name: Test online transducer
shell: bash
run: |
@@ -235,11 +267,12 @@ jobs:
.github/scripts/test-online-transducer.sh
- name: Copy files
+ if: matrix.build_type == 'Release'
shell: bash
run: |
SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
- if [[ ${{ matrix.with_tts }} ]]; then
+ if [[ ${{ matrix.with_tts }} == ON ]]; then
dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}
else
dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}-no-tts
@@ -256,7 +289,7 @@ jobs:
tar cjvf ${dst}.tar.bz2 $dst
- name: Release pre-compiled binaries and libs for macOS
- if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ if: matrix.build_type == 'Release' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
diff --git a/.github/workflows/mfc.yaml b/.github/workflows/mfc.yaml
index e501478a29..1315092c2d 100644
--- a/.github/workflows/mfc.yaml
+++ b/.github/workflows/mfc.yaml
@@ -8,7 +8,6 @@ on:
- 'v[0-9]+.[0-9]+.[0-9]+*'
paths:
- '.github/workflows/mfc.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'mfc-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -18,7 +17,6 @@ on:
- master
paths:
- '.github/workflows/mfc.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'mfc-examples/**'
- 'sherpa-onnx/csrc/*'
diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml
index 2ed2131847..aed04e284f 100644
--- a/.github/workflows/pascal.yaml
+++ b/.github/workflows/pascal.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/pascal.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'pascal-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -17,7 +16,6 @@ on:
- master
paths:
- '.github/workflows/pascal.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'pascal-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -127,6 +125,21 @@ jobs:
cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts
fi
+ - name: Run Pascal test (Speaker diarization)
+ shell: bash
+ run: |
+ export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
+
+ cd ./pascal-api-examples
+ pushd speaker-diarization
+
+ ./run.sh
+ rm -rfv *.onnx *.wav sherpa-onnx-*
+ ls -lh
+ echo "---"
+
+ popd
+
- name: Run Pascal test (TTS)
shell: bash
run: |
@@ -137,6 +150,31 @@ jobs:
./run-piper.sh
rm -rf vits-piper-*
+ rm piper
+ ls -lh
+ echo "---"
+
+ ./run-kokoro-zh-en.sh
+ rm -rf kokoro-multi-*
+ rm kokoro-zh-en
+ ls -lh
+ echo "---"
+
+ ./run-kokoro-en.sh
+ rm -rf kokoro-en-*
+ rm kokoro-en
+ ls -lh
+ echo "---"
+
+ ./run-matcha-zh.sh
+ rm -rf matcha-icefall-*
+ rm matcha-zh
+ ls -lh
+ echo "---"
+
+ ./run-matcha-en.sh
+ rm -rf matcha-icefall-*
+ rm matcha-en
ls -lh
echo "---"
@@ -150,6 +188,10 @@ jobs:
cd ./pascal-api-examples
pushd vad-with-non-streaming-asr
+ time ./run-vad-with-moonshine.sh
+ rm -rf sherpa-onnx-*
+ echo "---"
+
time ./run-vad-with-whisper.sh
rm -rf sherpa-onnx-*
echo "---"
@@ -205,6 +247,10 @@ jobs:
rm -rf sherpa-onnx-*
echo "---"
+ ./run-moonshine.sh
+ rm -rf sherpa-onnx-*
+ echo "---"
+
./run-whisper.sh
rm -rf sherpa-onnx-*
echo "---"
diff --git a/.github/workflows/pkg-config.yaml b/.github/workflows/pkg-config.yaml
index 57ed8a21a7..48ef160ba1 100644
--- a/.github/workflows/pkg-config.yaml
+++ b/.github/workflows/pkg-config.yaml
@@ -10,7 +10,6 @@ on:
paths:
- '.github/workflows/pkg-config.yaml'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -21,7 +20,6 @@ on:
paths:
- '.github/workflows/pkg-config.yaml'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/release-dart-package.yaml b/.github/workflows/release-dart-package.yaml
index f590403fea..cc830e2c2c 100644
--- a/.github/workflows/release-dart-package.yaml
+++ b/.github/workflows/release-dart-package.yaml
@@ -481,11 +481,8 @@ jobs:
- name: Copy pre-built libs
shell: bash
run: |
- echo "----ios-arm64----"
- cp -v build-ios-shared/ios-arm64/libsherpa-onnx-c-api.dylib flutter/sherpa_onnx_ios/ios/
- cp -v build-ios-shared/ios-onnxruntime/onnxruntime.xcframework/ios-arm64/onnxruntime.a flutter/sherpa_onnx_ios/ios/libonnxruntime.a
-
- ls -lh flutter/sherpa_onnx_ios/ios/libonnxruntime.a
+ echo "----ios arm64 and arm64_x64_simulator----"
+ cp -av build-ios-shared/sherpa_onnx.xcframework flutter/sherpa_onnx_ios/ios/
mv -v flutter/sherpa_onnx_ios /tmp/to_be_published
diff --git a/.github/workflows/riscv64-linux.yaml b/.github/workflows/riscv64-linux.yaml
index e3e5e8b198..f81d5cb2ec 100644
--- a/.github/workflows/riscv64-linux.yaml
+++ b/.github/workflows/riscv64-linux.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/riscv64-linux.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/riscv64-linux.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'toolchains/riscv64-linux-gnu.toolchain.cmake'
diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml
index 3e932707cc..ed5901e798 100644
--- a/.github/workflows/run-java-test.yaml
+++ b/.github/workflows/run-java-test.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/run-java-test.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'java-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -17,7 +16,6 @@ on:
- master
paths:
- '.github/workflows/run-java-test.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'java-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -107,6 +105,38 @@ jobs:
make -j4
ls -lh lib
+ - name: Run java test (Non-Streaming ASR)
+ shell: bash
+ run: |
+ cd ./java-api-examples
+
+ ./run-non-streaming-decode-file-moonshine.sh
+ rm -rf sherpa-onnx-moonshine-*
+
+ ./run-non-streaming-decode-file-sense-voice.sh
+ rm -rf sherpa-onnx-sense-voice-*
+
+ ./run-inverse-text-normalization-paraformer.sh
+
+ ./run-non-streaming-decode-file-paraformer.sh
+ rm -rf sherpa-onnx-paraformer-zh-*
+
+ ./run-non-streaming-decode-file-transducer.sh
+ rm -rf sherpa-onnx-zipformer-*
+
+ ./run-non-streaming-decode-file-whisper.sh
+ rm -rf sherpa-onnx-whisper-*
+
+ ./run-non-streaming-decode-file-nemo.sh
+ rm -rf sherpa-onnx-nemo-*
+
+ - name: Run java test (speaker diarization)
+ shell: bash
+ run: |
+ cd ./java-api-examples
+ ./run-offline-speaker-diarization.sh
+ rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*
+
- name: Run java test (kws)
shell: bash
run: |
@@ -199,32 +229,23 @@ jobs:
./run-streaming-decode-file-transducer.sh
rm -rf sherpa-onnx-streaming-*
- - name: Run java test (Non-Streaming ASR)
+ - name: Run java test (Non-Streaming TTS)
shell: bash
run: |
cd ./java-api-examples
- ./run-non-streaming-decode-file-sense-voice.sh
- rm -rf sherpa-onnx-sense-voice-*
-
- ./run-inverse-text-normalization-paraformer.sh
-
- ./run-non-streaming-decode-file-paraformer.sh
- rm -rf sherpa-onnx-paraformer-zh-*
-
- ./run-non-streaming-decode-file-transducer.sh
- rm -rf sherpa-onnx-zipformer-*
+ ./run-non-streaming-tts-kokoro-zh-en.sh
+ ./run-non-streaming-tts-kokoro-en.sh
+ ./run-non-streaming-tts-matcha-zh.sh
+ ./run-non-streaming-tts-matcha-en.sh
+ ls -lh
- ./run-non-streaming-decode-file-whisper.sh
- rm -rf sherpa-onnx-whisper-*
+ rm -rf kokoro-multi-*
+ rm -rf kokoro-en-*
- ./run-non-streaming-decode-file-nemo.sh
- rm -rf sherpa-onnx-nemo-*
+ rm -rf matcha-icefall-*
+ rm hifigan_v2.onnx
- - name: Run java test (Non-Streaming TTS)
- shell: bash
- run: |
- cd ./java-api-examples
./run-non-streaming-tts-piper-en.sh
rm -rf vits-piper-*
diff --git a/.github/workflows/run-python-test-macos.yaml b/.github/workflows/run-python-test-macos.yaml
index ed51379d2e..c9fafe68a3 100644
--- a/.github/workflows/run-python-test-macos.yaml
+++ b/.github/workflows/run-python-test-macos.yaml
@@ -7,7 +7,6 @@ on:
paths:
- '.github/workflows/run-python-test-macos.yaml'
- '.github/scripts/test-python.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'python-api-examples/**'
@@ -17,7 +16,6 @@ on:
paths:
- '.github/workflows/run-python-test-macos.yaml'
- '.github/scripts/test-python.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'python-api-examples/**'
@@ -54,6 +52,9 @@ jobs:
- os: macos-latest
python-version: "3.12"
+ - os: macos-latest
+ python-version: "3.13"
+
steps:
- uses: actions/checkout@v4
with:
diff --git a/.github/workflows/run-python-test.yaml b/.github/workflows/run-python-test.yaml
index 80fa86a746..7080420f94 100644
--- a/.github/workflows/run-python-test.yaml
+++ b/.github/workflows/run-python-test.yaml
@@ -7,7 +7,6 @@ on:
paths:
- '.github/workflows/run-python-test.yaml'
- '.github/scripts/test-python.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'python-api-examples/**'
@@ -17,7 +16,6 @@ on:
paths:
- '.github/workflows/run-python-test.yaml'
- '.github/scripts/test-python.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'python-api-examples/**'
@@ -53,6 +51,8 @@ jobs:
python-version: "3.11"
- os: ubuntu-22.04
python-version: "3.12"
+ - os: ubuntu-22.04
+ python-version: "3.13"
steps:
- uses: actions/checkout@v4
diff --git a/.github/workflows/sanitizer.yaml b/.github/workflows/sanitizer.yaml
index 7fce3834a1..7cda968990 100644
--- a/.github/workflows/sanitizer.yaml
+++ b/.github/workflows/sanitizer.yaml
@@ -76,6 +76,15 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
+ - name: Test C++ API
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin:$PATH
+ export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
+ export CXX_WHISPER_EXE=whisper-cxx-api
+
+ .github/scripts/test-cxx-api.sh
+
- name: Test online punctuation
shell: bash
run: |
@@ -109,7 +118,6 @@ jobs:
.github/scripts/test-online-ctc.sh
-
- name: Test C API
shell: bash
run: |
diff --git a/.github/workflows/speaker-diarization.yaml b/.github/workflows/speaker-diarization.yaml
index 0bd6a575ca..ab2a4f0904 100644
--- a/.github/workflows/speaker-diarization.yaml
+++ b/.github/workflows/speaker-diarization.yaml
@@ -67,7 +67,7 @@ jobs:
curl -SL -O https://huggingface.co/csukuangfj/pyannote-models/resolve/main/segmentation-3.0/pytorch_model.bin
test_wavs=(
- 0-two-speakers-zh.wav
+ 0-four-speakers-zh.wav
1-two-speakers-en.wav
2-two-speakers-en.wav
3-two-speakers-en.wav
diff --git a/.github/workflows/swift.yaml b/.github/workflows/swift.yaml
index 3176c9b313..35bb7ab36d 100644
--- a/.github/workflows/swift.yaml
+++ b/.github/workflows/swift.yaml
@@ -4,10 +4,11 @@ on:
push:
branches:
- master
+ tags:
+ - 'v[0-9]+.[0-9]+.[0-9]+*'
paths:
- './build-swift-macos.sh'
- '.github/workflows/swift.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'swift-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -20,7 +21,6 @@ on:
paths:
- './build-swift-macos.sh'
- '.github/workflows/swift.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'swift-api-examples/**'
- 'sherpa-onnx/csrc/*'
@@ -65,6 +65,30 @@ jobs:
./build-swift-macos.sh
+ - name: Copy files
+ if: matrix.os == 'macos-13' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ shell: bash
+ run: |
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+
+ dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-macos-xcframework-static
+ mkdir $dst
+
+ mv -v build-swift-macos/sherpa-onnx.xcframework $dst
+
+ brew install tree
+ tree $dst
+
+ tar cjvf ${dst}.tar.bz2 $dst
+
+ - name: Release pre-compiled binaries and libs for macOS
+ if: matrix.os == 'macos-13' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: sherpa-onnx-*macos-xcframework-static.tar.bz2
+
- name: test
shell: bash
run: |
diff --git a/.github/workflows/test-build-wheel.yaml b/.github/workflows/test-build-wheel.yaml
index a9b2db5892..d9c863160a 100644
--- a/.github/workflows/test-build-wheel.yaml
+++ b/.github/workflows/test-build-wheel.yaml
@@ -7,7 +7,6 @@ on:
paths:
- 'setup.py'
- '.github/workflows/test-build-wheel.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/python/**'
@@ -17,7 +16,6 @@ on:
paths:
- 'setup.py'
- '.github/workflows/test-build-wheel.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/python/**'
@@ -139,7 +137,8 @@ jobs:
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
- export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH
+ export PATH=/c/hostedtoolcache/windows/Python/3.12.8/x64/bin:$PATH
+ export PATH=/c/hostedtoolcache/windows/Python/3.13.1/x64/bin:$PATH
which sherpa-onnx
sherpa-onnx --help
diff --git a/.github/workflows/test-dart.yaml b/.github/workflows/test-dart.yaml
index 58d5054902..d9e27e86fe 100644
--- a/.github/workflows/test-dart.yaml
+++ b/.github/workflows/test-dart.yaml
@@ -114,6 +114,7 @@ jobs:
cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
+ cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml
cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
diff --git a/.github/workflows/test-dot-net-nuget.yaml b/.github/workflows/test-dot-net-nuget.yaml
index d325824414..b89781be56 100644
--- a/.github/workflows/test-dot-net-nuget.yaml
+++ b/.github/workflows/test-dot-net-nuget.yaml
@@ -75,10 +75,10 @@ jobs:
run: |
df -h
- - name: Setup .NET 6.0
+ - name: Setup .NET 8.0
uses: actions/setup-dotnet@v4
with:
- dotnet-version: 6.0.x
+ dotnet-version: 8.0.x
- name: Check dotnet
run: dotnet --info
diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml
index 6e32b155ec..9b46b64d97 100644
--- a/.github/workflows/test-dot-net.yaml
+++ b/.github/workflows/test-dot-net.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-dot-net.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'dotnet-examples/**'
@@ -17,7 +16,6 @@ on:
- master
paths:
- '.github/workflows/test-dot-net.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'dotnet-examples/**'
@@ -47,8 +45,57 @@ jobs:
with:
fetch-depth: 0
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ key: ${{ matrix.os }}-dotnet-release-shared
+
+ - name: Build sherpa-onnx
+ shell: bash
+ run: |
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+ cmake --version
+
+ mkdir build
+ cd build
+ cmake \
+ -DBUILD_SHARED_LIBS=ON \
+ -DCMAKE_INSTALL_PREFIX=./install \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+ -DBUILD_ESPEAK_NG_EXE=OFF \
+ -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+ ..
+
+ cmake --build . --target install --config Release
+
+ rm -rf install/share
+ rm -rf install/lib/pkg*
+
+ ls -lh ./install/lib
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: ${{ matrix.os }}
+ path: ./build/install/lib/
+
+ test-dot-net:
+ runs-on: ${{ matrix.os }}
+ needs: [build-libs]
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ python-version: ["3.8"]
+
+ steps:
+ - name: Check space
+ shell: bash
+ run: |
+ df -h
+
- name: Free space
- if: matrix.os == 'ubuntu-latest'
shell: bash
run: |
df -h
@@ -56,7 +103,6 @@ jobs:
df -h
- name: Free more space
- if: matrix.os == 'ubuntu-latest'
shell: bash
run: |
# https://github.com/orgs/community/discussions/25678
@@ -68,7 +114,6 @@ jobs:
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Free Disk Space (Ubuntu)
- if: matrix.os == 'ubuntu-latest'
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
@@ -85,51 +130,10 @@ jobs:
swap-storage: true
- name: Check space
- if: matrix.os == 'ubuntu-latest'
shell: bash
run: |
df -h
- - name: ccache
- uses: hendrikmuhs/ccache-action@v1.2
- with:
- key: ${{ matrix.os }}-release-shared
-
- - name: Build sherpa-onnx
- shell: bash
- run: |
- export CMAKE_CXX_COMPILER_LAUNCHER=ccache
- export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
- cmake --version
-
- mkdir build
- cd build
- cmake \
- -DBUILD_SHARED_LIBS=ON \
- -DCMAKE_INSTALL_PREFIX=./install \
- -DCMAKE_BUILD_TYPE=Release \
- -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
- -DBUILD_ESPEAK_NG_EXE=OFF \
- -DSHERPA_ONNX_ENABLE_BINARY=ON \
- ..
-
- cmake --build . --target install --config Release
-
- - uses: actions/upload-artifact@v4
- with:
- name: ${{ matrix.os }}
- path: ./build/install/lib/
-
- test-dot-net:
- runs-on: ${{ matrix.os }}
- needs: [build-libs]
- strategy:
- fail-fast: false
- matrix:
- os: [ubuntu-latest]
- python-version: ["3.8"]
-
- steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -148,13 +152,12 @@ jobs:
uses: actions/download-artifact@v4
with:
name: ubuntu-latest
- path: /tmp/linux
+ path: /tmp/linux-x64
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
- dotnet-version: |
- 6.0.x
+ dotnet-version: 8.0.x
- name: Check dotnet
run: dotnet --info
@@ -162,17 +165,21 @@ jobs:
- name: Display files
shell: bash
run: |
- echo "----------/tmp/----------"
- ls -lh /tmp/
+ echo "----------/tmp----------"
+ ls -lh /tmp
- echo "----------/tmp/linux----------"
- ls -lh /tmp/linux
+ echo "----------/tmp/linux-x64----------"
+ ls -lh /tmp/linux-x64
+ df -h
- name: Build
shell: bash
run: |
cd scripts/dotnet
./run.sh
+ df -h
+
+ ls -lh /tmp/packages
- name: Copy files
shell: bash
@@ -181,9 +188,14 @@ jobs:
ls -lh /tmp
+ df -h
+
- name: Run tests
shell: bash
run: |
+ dotnet nuget locals all --clear
+ df -h
+
.github/scripts/test-dot-net.sh
- uses: actions/upload-artifact@v4
diff --git a/.github/workflows/test-go-package.yaml b/.github/workflows/test-go-package.yaml
index 2634e5ca75..f2e4cb1bc3 100644
--- a/.github/workflows/test-go-package.yaml
+++ b/.github/workflows/test-go-package.yaml
@@ -68,6 +68,64 @@ jobs:
run: |
gcc --version
+ - name: Test Keyword spotting
+ if: matrix.os != 'windows-latest'
+ shell: bash
+ run: |
+ cd go-api-examples/keyword-spotting-from-file/
+ ./run.sh
+
+ - name: Test adding punctuation
+ if: matrix.os != 'windows-latest'
+ shell: bash
+ run: |
+ cd go-api-examples/add-punctuation/
+ ./run.sh
+
+ - name: Test non-streaming speaker diarization
+ if: matrix.os != 'windows-latest'
+ shell: bash
+ run: |
+ cd go-api-examples/non-streaming-speaker-diarization/
+ ./run.sh
+
+ - name: Test non-streaming speaker diarization
+ if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
+ shell: bash
+ run: |
+ cd go-api-examples/non-streaming-speaker-diarization/
+ go mod tidy
+ cat go.mod
+ go build
+
+ echo $PWD
+ ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+ ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+ cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
+
+ ./run.sh
+
+ - name: Test non-streaming speaker diarization
+ if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
+ shell: bash
+ run: |
+ cd go-api-examples/non-streaming-speaker-diarization/
+
+ go env GOARCH
+ go env -w GOARCH=386
+ go env -w CGO_ENABLED=1
+
+ go mod tidy
+ cat go.mod
+ go build
+
+ echo $PWD
+ ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+ ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+ cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
+
+ ./run.sh
+
- name: Test streaming HLG decoding (Linux/macOS)
if: matrix.os != 'windows-latest'
shell: bash
@@ -151,6 +209,25 @@ jobs:
go build
ls -lh
+ echo "Test kokoro zh+en"
+ ./run-kokoro-zh-en.sh
+ rm -rf kokoro-multi-*
+ ls -lh
+
+ echo "Test kokoro en"
+ ./run-kokoro-en.sh
+ rm -rf kokoro-en-*
+ ls -lh
+
+ echo "Test matcha zh"
+ ./run-matcha-zh.sh
+ rm -rf matcha-icefall-*
+
+ echo "Test matcha en"
+ ./run-matcha-en.sh
+ rm -rf matcha-icefall-*
+ ls -lh *.wav
+
echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
@@ -188,6 +265,15 @@ jobs:
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
ls -lh
+ echo "Test matcha zh"
+ ./run-matcha-zh.sh
+ rm -rf matcha-icefall-*
+
+ echo "Test matcha en"
+ ./run-matcha-en.sh
+ rm -rf matcha-icefall-*
+ ls -lh *.wav
+
echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
@@ -233,6 +319,15 @@ jobs:
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
ls -lh
+ echo "Test matcha zh"
+ ./run-matcha-zh.sh
+ rm -rf matcha-icefall-*
+
+ echo "Test matcha en"
+ ./run-matcha-en.sh
+ rm -rf matcha-icefall-*
+ ls -lh *.wav
+
echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml
index 65c72e1741..8d68076d71 100644
--- a/.github/workflows/test-go.yaml
+++ b/.github/workflows/test-go.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-go.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'go-api-examples/**'
@@ -16,7 +15,6 @@ on:
- master
paths:
- '.github/workflows/test-go.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'go-api-examples/**'
@@ -134,53 +132,15 @@ jobs:
name: ${{ matrix.os }}-libs
path: to-upload/
- - name: Test speaker identification
+ - name: Test Keyword spotting
shell: bash
run: |
- cd scripts/go/_internal/speaker-identification/
- ./run.sh
+ cd scripts/go/_internal/keyword-spotting-from-file/
- - name: Test streaming HLG decoding
- shell: bash
- run: |
- cd scripts/go/_internal/streaming-hlg-decoding/
./run.sh
- - name: Test non-streaming TTS
- shell: bash
- run: |
- mkdir tts-waves
-
- cd scripts/go/_internal/non-streaming-tts/
- ls -lh
- go mod tidy
- cat go.mod
- go build
ls -lh
- echo "Test vits-ljs"
- ./run-vits-ljs.sh
- rm -rf vits-ljs
-
- echo "Test vits-vctk"
- ./run-vits-vctk.sh
- rm -rf vits-vctk
-
- echo "Test vits-zh-aishell3"
- ./run-vits-zh-aishell3.sh
- rm -rf vits-icefall-zh-aishell3
-
- echo "Test vits-piper-en_US-lessac-medium"
- ./run-vits-piper-en_US-lessac-medium.sh
- rm -rf vits-piper-en_US-lessac-medium
-
- cp *.wav ../../../../tts-waves/
-
- - uses: actions/upload-artifact@v4
- with:
- name: tts-waves-${{ matrix.os }}
- path: tts-waves
-
- name: Test non-streaming decoding files
shell: bash
run: |
@@ -191,6 +151,10 @@ jobs:
go build
ls -lh
+ echo "Test Moonshine"
+ ./run-moonshine.sh
+ rm -rf sherpa-onnx-*
+
echo "Test SenseVoice ctc"
./run-sense-voice-small.sh
rm -rf sherpa-onnx-sense-*
@@ -224,6 +188,84 @@ jobs:
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-tdnn-yesno
+ - name: Test adding punctuation
+ shell: bash
+ run: |
+ cd scripts/go/_internal/add-punctuation/
+ ./run.sh
+
+ - name: Test non-streaming speaker diarization
+ shell: bash
+ run: |
+ cd scripts/go/_internal/non-streaming-speaker-diarization/
+ ./run.sh
+
+ - name: Test speaker identification
+ shell: bash
+ run: |
+ cd scripts/go/_internal/speaker-identification/
+ ./run.sh
+
+ - name: Test streaming HLG decoding
+ shell: bash
+ run: |
+ cd scripts/go/_internal/streaming-hlg-decoding/
+ ./run.sh
+
+ - name: Test non-streaming TTS
+ shell: bash
+ run: |
+ mkdir tts-waves
+
+ cd scripts/go/_internal/non-streaming-tts/
+ ls -lh
+ go mod tidy
+ cat go.mod
+ go build
+ ls -lh
+
+ echo "Test kokoro zh+en"
+ ./run-kokoro-zh-en.sh
+ rm -rf kokoro-multi-*
+ ls -lh
+
+ echo "Test kokoro en"
+ ./run-kokoro-en.sh
+ rm -rf kokoro-en-*
+ ls -lh
+
+ echo "Test matcha zh"
+ ./run-matcha-zh.sh
+ rm -rf matcha-icefall-*
+
+ echo "Test matcha en"
+ ./run-matcha-en.sh
+ rm -rf matcha-icefall-*
+ ls -lh *.wav
+
+ echo "Test vits-ljs"
+ ./run-vits-ljs.sh
+ rm -rf vits-ljs
+
+ echo "Test vits-vctk"
+ ./run-vits-vctk.sh
+ rm -rf vits-vctk
+
+ echo "Test vits-zh-aishell3"
+ ./run-vits-zh-aishell3.sh
+ rm -rf vits-icefall-zh-aishell3
+
+ echo "Test vits-piper-en_US-lessac-medium"
+ ./run-vits-piper-en_US-lessac-medium.sh
+ rm -rf vits-piper-en_US-lessac-medium
+
+ cp *.wav ../../../../tts-waves/
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: tts-waves-${{ matrix.os }}
+ path: tts-waves
+
- name: Test streaming decoding files
shell: bash
run: |
diff --git a/.github/workflows/test-nodejs-addon-api.yaml b/.github/workflows/test-nodejs-addon-api.yaml
index 224fc0f0b6..539025c8c5 100644
--- a/.github/workflows/test-nodejs-addon-api.yaml
+++ b/.github/workflows/test-nodejs-addon-api.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-api.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -17,7 +16,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-api.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-addon-npm-aarch64.yaml b/.github/workflows/test-nodejs-addon-npm-aarch64.yaml
index 07ab8d8781..232f8fe27c 100644
--- a/.github/workflows/test-nodejs-addon-npm-aarch64.yaml
+++ b/.github/workflows/test-nodejs-addon-npm-aarch64.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-npm-aarch64.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-npm-aarch64.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-addon-npm-win-x86.yaml b/.github/workflows/test-nodejs-addon-npm-win-x86.yaml
index 98cba9dec1..0a21630dea 100644
--- a/.github/workflows/test-nodejs-addon-npm-win-x86.yaml
+++ b/.github/workflows/test-nodejs-addon-npm-win-x86.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-npm-win-x86.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -20,7 +19,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-npm-win-x86.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-addon-npm.yaml b/.github/workflows/test-nodejs-addon-npm.yaml
index 27a962357d..0e2b9f55fd 100644
--- a/.github/workflows/test-nodejs-addon-npm.yaml
+++ b/.github/workflows/test-nodejs-addon-npm.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-npm.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs-addon-npm.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-npm.yaml b/.github/workflows/test-nodejs-npm.yaml
index cc49ac0c40..e1358fd8d2 100644
--- a/.github/workflows/test-nodejs-npm.yaml
+++ b/.github/workflows/test-nodejs-npm.yaml
@@ -26,7 +26,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
- python-version: ["3.8"]
+ python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
diff --git a/.github/workflows/test-nodejs.yaml b/.github/workflows/test-nodejs.yaml
index 25f3c38fdc..78788ad047 100644
--- a/.github/workflows/test-nodejs.yaml
+++ b/.github/workflows/test-nodejs.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
@@ -18,7 +17,6 @@ on:
- master
paths:
- '.github/workflows/test-nodejs.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml
index 0f73e3643d..139e09a0e7 100644
--- a/.github/workflows/test-pip-install.yaml
+++ b/.github/workflows/test-pip-install.yaml
@@ -42,6 +42,8 @@ jobs:
python-version: "3.11"
- os: ubuntu-22.04
python-version: "3.12"
+ - os: ubuntu-22.04
+ python-version: "3.13"
- os: macos-12
python-version: "3.8"
@@ -55,6 +57,8 @@ jobs:
- os: macos-14
python-version: "3.12"
+ - os: macos-14
+ python-version: "3.13"
- os: windows-2019
python-version: "3.7"
@@ -69,6 +73,8 @@ jobs:
python-version: "3.11"
- os: windows-2022
python-version: "3.12"
+ - os: windows-2022
+ python-version: "3.13"
steps:
- uses: actions/checkout@v4
@@ -104,7 +110,8 @@ jobs:
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
- export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH
+ export PATH=/c/hostedtoolcache/windows/Python/3.12.8/x64/bin:$PATH
+ export PATH=/c/hostedtoolcache/windows/Python/3.13.1/x64/bin:$PATH
sherpa-onnx --help
sherpa-onnx-keyword-spotter --help
diff --git a/.github/workflows/test-piper-phonemize.yaml b/.github/workflows/test-piper-phonemize.yaml
index 1edbae6d2c..744095411d 100644
--- a/.github/workflows/test-piper-phonemize.yaml
+++ b/.github/workflows/test-piper-phonemize.yaml
@@ -5,7 +5,6 @@ on:
- master
paths:
- '.github/workflows/test-piper-phonemize.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
pull_request:
@@ -13,7 +12,6 @@ on:
- master
paths:
- '.github/workflows/test-piper-phonemize.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
diff --git a/.github/workflows/test-python-offline-websocket-server.yaml b/.github/workflows/test-python-offline-websocket-server.yaml
index 52a22ee5ae..4fa98464c5 100644
--- a/.github/workflows/test-python-offline-websocket-server.yaml
+++ b/.github/workflows/test-python-offline-websocket-server.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-python-offline-websocket-server.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/python/**'
@@ -15,7 +14,6 @@ on:
- master
paths:
- '.github/workflows/test-python-offline-websocket-server.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/python/**'
diff --git a/.github/workflows/test-python-online-websocket-server.yaml b/.github/workflows/test-python-online-websocket-server.yaml
index badf343a0a..d22e93002a 100644
--- a/.github/workflows/test-python-online-websocket-server.yaml
+++ b/.github/workflows/test-python-online-websocket-server.yaml
@@ -6,7 +6,6 @@ on:
- master
paths:
- '.github/workflows/test-python-online-websocket-server.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/python/**'
@@ -15,7 +14,6 @@ on:
- master
paths:
- '.github/workflows/test-python-online-websocket-server.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
- 'sherpa-onnx/python/**'
diff --git a/.github/workflows/wasm-simd-hf-space-de-tts.yaml b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
index cbd3b1fce6..76013291b7 100644
--- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
@@ -29,7 +29,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
index 510a003c7a..d34a182d41 100644
--- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
@@ -28,7 +28,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-en-tts.yaml b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
index 9c5c1d4469..d67ae88181 100644
--- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
@@ -29,7 +29,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
index dc8bada704..81052cac84 100644
--- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
+++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
@@ -29,7 +29,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml b/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml
new file mode 100644
index 0000000000..14301f9f06
--- /dev/null
+++ b/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml
@@ -0,0 +1,167 @@
+name: wasm-simd-hf-space-speaker-diarization
+
+on:
+ push:
+ branches:
+ - wasm
+ - wasm-speaker-diarization
+ tags:
+ - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+ workflow_dispatch:
+
+concurrency:
+ group: wasm-simd-hf-space-speaker-diarization-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ wasm-simd-hf-space-speaker-diarization:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Install emsdk
+ uses: mymindstorm/setup-emsdk@v14
+ with:
+ version: 3.1.53
+ actions-cache-folder: 'emsdk-cache'
+
+ - name: View emsdk version
+ shell: bash
+ run: |
+ emcc -v
+ echo "--------------------"
+ emcc --check
+
+ - name: Download model files
+ shell: bash
+ run: |
+ cd wasm/speaker-diarization/assets/
+ ls -lh
+ echo "----------"
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ./segmentation.onnx
+ rm -rf sherpa-onnx-pyannote-segmentation-3-0
+
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+ mv 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ./embedding.onnx
+
+ echo "----------"
+
+ ls -lh
+
+ - name: Build sherpa-onnx for WebAssembly
+ shell: bash
+ run: |
+ ./build-wasm-simd-speaker-diarization.sh
+
+ - name: collect files
+ shell: bash
+ run: |
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+
+ dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-speaker-diarization
+ mv build-wasm-simd-speaker-diarization/install/bin/wasm/speaker-diarization $dst
+ ls -lh $dst
+ tar cjfv $dst.tar.bz2 ./$dst
+
+ - name: Upload wasm files
+ uses: actions/upload-artifact@v4
+ with:
+ name: sherpa-onnx-wasm-simd-speaker-diarization
+ path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+
+ - name: Release
+ if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: ./*.tar.bz2
+
+ - name: Publish to ModelScope
+ # if: false
+ env:
+ MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
+ uses: nick-fields/retry@v2
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf ms
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git ms
+ cd ms
+ rm -fv *.js
+ rm -fv *.data
+ git fetch
+ git pull
+ git merge -m "merge remote" --ff origin main
+
+ cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .
+
+ git status
+ git lfs track "*.data"
+ git lfs track "*.wasm"
+ ls -lh
+
+ git add .
+ git commit -m "update model"
+ git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git
+
+ - name: Publish to huggingface
+ env:
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ uses: nick-fields/retry@v2
+ with:
+ max_attempts: 20
+ timeout_seconds: 200
+ shell: bash
+ command: |
+ SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
+
+ git config --global user.email "csukuangfj@gmail.com"
+ git config --global user.name "Fangjun Kuang"
+
+ rm -rf huggingface
+ export GIT_LFS_SKIP_SMUDGE=1
+ export GIT_CLONE_PROTECTION_ACTIVE=false
+
+ git clone https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx huggingface
+ ls -lh
+
+ cd huggingface
+ rm -fv *.js
+ rm -fv *.data
+ git fetch
+ git pull
+ git merge -m "merge remote" --ff origin main
+
+ cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .
+
+ git status
+ git lfs track "*.data"
+ git lfs track "*.wasm"
+ ls -lh
+
+ git add .
+ git commit -m "update model"
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx main
diff --git a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
index c093f0fe99..18c1c1d607 100644
--- a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
+++ b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
@@ -37,7 +37,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
index c72e0cef29..02a328a9bd 100644
--- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
@@ -29,7 +29,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
index b76f912b47..1a72be6ab4 100644
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
@@ -29,7 +29,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
index 9bdd90ee24..8b7c2029f7 100644
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
@@ -29,7 +29,7 @@ jobs:
- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
- version: 3.1.51
+ version: 3.1.53
actions-cache-folder: 'emsdk-cache'
- name: View emsdk version
diff --git a/.github/workflows/windows-arm64.yaml b/.github/workflows/windows-arm64.yaml
index a6d2a96da2..b6ab5bf7e9 100644
--- a/.github/workflows/windows-arm64.yaml
+++ b/.github/workflows/windows-arm64.yaml
@@ -8,7 +8,6 @@ on:
- 'v[0-9]+.[0-9]+.[0-9]+*'
paths:
- '.github/workflows/windows-arm64.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
pull_request:
@@ -16,7 +15,6 @@ on:
- master
paths:
- '.github/workflows/windows-arm64.yaml'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -34,7 +32,7 @@ jobs:
fail-fast: false
matrix:
os: [windows-latest]
- shared_lib: [ON]
+ shared_lib: [ON, OFF]
with_tts: [ON, OFF]
steps:
diff --git a/.github/workflows/windows-x64-cuda.yaml b/.github/workflows/windows-x64-cuda.yaml
index fd45704558..0d15af946b 100644
--- a/.github/workflows/windows-x64-cuda.yaml
+++ b/.github/workflows/windows-x64-cuda.yaml
@@ -14,7 +14,6 @@ on:
- '.github/scripts/test-offline-ctc.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
pull_request:
@@ -28,7 +27,6 @@ on:
- '.github/scripts/test-offline-ctc.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
diff --git a/.github/workflows/windows-x64-debug.yaml b/.github/workflows/windows-x64-debug.yaml
index 09f93fd0d0..7abf022853 100644
--- a/.github/workflows/windows-x64-debug.yaml
+++ b/.github/workflows/windows-x64-debug.yaml
@@ -14,7 +14,6 @@ on:
- '.github/scripts/test-offline-ctc.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
pull_request:
@@ -28,7 +27,6 @@ on:
- '.github/scripts/test-offline-ctc.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml
index 2d2811c31e..76dd426238 100644
--- a/.github/workflows/windows-x64.yaml
+++ b/.github/workflows/windows-x64.yaml
@@ -17,7 +17,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
pull_request:
@@ -34,7 +36,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -87,6 +91,32 @@ jobs:
name: release-windows-x64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}
path: build/install/*
+ - name: Test offline Moonshine for windows x64
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin/Release:$PATH
+ export EXE=sherpa-onnx-offline.exe
+
+ .github/scripts/test-offline-moonshine.sh
+
+ - name: Test C++ API
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin/Release:$PATH
+ export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe
+ export CXX_WHISPER_EXE=whisper-cxx-api.exe
+ export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe
+
+ .github/scripts/test-cxx-api.sh
+
+ - name: Test offline speaker diarization
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin/Release:$PATH
+ export EXE=sherpa-onnx-offline-speaker-diarization.exe
+
+ .github/scripts/test-speaker-diarization.sh
+
- name: Test online punctuation
shell: bash
run: |
diff --git a/.github/workflows/windows-x86-debug.yaml b/.github/workflows/windows-x86-debug.yaml
index f72bf25664..59d9ef3707 100644
--- a/.github/workflows/windows-x86-debug.yaml
+++ b/.github/workflows/windows-x86-debug.yaml
@@ -14,7 +14,6 @@ on:
- '.github/scripts/test-offline-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
pull_request:
@@ -28,7 +27,6 @@ on:
- '.github/scripts/test-offline-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- - 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml
index 316cef6265..f1498c0c0c 100644
--- a/.github/workflows/windows-x86.yaml
+++ b/.github/workflows/windows-x86.yaml
@@ -17,7 +17,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
pull_request:
@@ -34,7 +36,9 @@ on:
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- '.github/scripts/test-online-punctuation.sh'
- - 'CMakeLists.txt'
+ - '.github/scripts/test-speaker-diarization.sh'
+ - '.github/scripts/test-c-api.sh'
+ - '.github/scripts/test-cxx-api.sh'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -87,6 +91,32 @@ jobs:
name: release-windows-x86-${{ matrix.shared_lib }}-${{ matrix.with_tts }}
path: build/install/*
+ - name: Test offline Moonshine for windows x86
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin/Release:$PATH
+ export EXE=sherpa-onnx-offline.exe
+
+ .github/scripts/test-offline-moonshine.sh
+
+ - name: Test C++ API
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin/Release:$PATH
+ export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe
+ export CXX_WHISPER_EXE=whisper-cxx-api.exe
+ export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe
+
+ .github/scripts/test-cxx-api.sh
+
+ - name: Test offline speaker diarization
+ shell: bash
+ run: |
+ export PATH=$PWD/build/bin/Release:$PATH
+ export EXE=sherpa-onnx-offline-speaker-diarization.exe
+
+ .github/scripts/test-speaker-diarization.sh
+
- name: Test online punctuation
shell: bash
run: |
diff --git a/.gitignore b/.gitignore
index b0fbfae781..ea356b0652 100644
--- a/.gitignore
+++ b/.gitignore
@@ -120,3 +120,16 @@ vits-melo-tts-zh_en
sherpa-onnx-online-punct-en-2024-08-06
*.mp4
*.mp3
+sherpa-onnx-pyannote-segmentation-3-0
+sherpa-onnx-moonshine-tiny-en-int8
+sherpa-onnx-moonshine-base-en-int8
+harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
+harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md
+matcha-icefall-zh-baker
+matcha-icefall-en_US-ljspeech
+kokoro-en-v0_19
+*.pt
+lexicon.txt
+us_gold.json
+us_silver.json
+kokoro-multi-lang-v1_0
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7af4a3f67b..4317d83976 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,233 @@
+## 1.10.42
+
+* Fix publishing wheels (#1746)
+* Update README to include https://github.com/xinhecuican/QSmartAssistant (#1755)
+* Add Kokoro TTS to MFC examples (#1760)
+* Refactor node-addon C++ code. (#1768)
+* Add keyword spotter C API for HarmonyOS (#1769)
+* Add ArkTS API for Keyword spotting. (#1775)
+* Add Flutter example for Kokoro TTS (#1776)
+* Initialize the audio session for iOS ASR example (#1786)
+* Fix: Prepend 0 to tokenization to prevent word skipping for Kokoro. (#1787)
+* Export Kokoro 1.0 to sherpa-onnx (#1788)
+* Add C++ and Python API for Kokoro 1.0 multilingual TTS model (#1795)
+* Add Java and Koltin API for Kokoro TTS 1.0 (#1798)
+* Add Android demo for Kokoro TTS 1.0 (#1799)
+* Add C API for Kokoro TTS 1.0 (#1801)
+* Add CXX API for Kokoro TTS 1.0 (#1802)
+* Add Swift API for Kokoro TTS 1.0 (#1803)
+* Add Go API for Kokoro TTS 1.0 (#1804)
+* Add C# API for Kokoro TTS 1.0 (#1805)
+* Add Dart API for Kokoro TTS 1.0 (#1806)
+* Add Pascal API for Kokoro TTS 1.0 (#1807)
+* Add JavaScript API (node-addon) for Kokoro TTS 1.0 (#1808)
+* Add JavaScript API (WebAssembly) for Kokoro TTS 1.0 (#1809)
+* Add Flutter example for Kokoro TTS 1.0 (#1810)
+* Add iOS demo for Kokoro TTS 1.0 (#1812)
+* Add HarmonyOS demo for Kokoro TTS 1.0 (#1813)
+
+## 1.10.41
+
+* Fix UI for Android TTS Engine. (#1735)
+* Add iOS TTS example for MatchaTTS (#1736)
+* Add iOS example for Kokoro TTS (#1737)
+* Fix dither binding in Pybind11 to ensure independence from high_freq in FeatureExtractorConfig (#1739)
+* Fix keyword spotting. (#1689)
+* Update readme to include https://github.com/hfyydd/sherpa-onnx-server (#1741)
+* Reduce vad-moonshine-c-api example code. (#1742)
+* Support Kokoro TTS for HarmonyOS. (#1743)
+
+## 1.10.40
+
+* Fix building wheels (#1703)
+* Export kokoro to sherpa-onnx (#1713)
+* Add C++ and Python API for Kokoro TTS models. (#1715)
+* Add C API for Kokoro TTS models (#1717)
+* Fix style issues (#1718)
+* Add C# API for Kokoro TTS models (#1720)
+* Add Swift API for Kokoro TTS models (#1721)
+* Add Go API for Kokoro TTS models (#1722)
+* Add Dart API for Kokoro TTS models (#1723)
+* Add Pascal API for Kokoro TTS models (#1724)
+* Add JavaScript API (node-addon) for Kokoro TTS models (#1725)
+* Add JavaScript (WebAssembly) API for Kokoro TTS models. (#1726)
+* Add Koltin and Java API for Kokoro TTS models (#1728)
+* Update README.md for KWS to not use git lfs. (#1729)
+
+
+
+
+## 1.10.39
+
+* Fix building without TTS (#1691)
+* Add README for android libs. (#1693)
+* Fix: export-onnx.py(expected all tensors to be on the same device) (#1699)
+* Fix passing strings from C# to C. (#1701)
+
+## 1.10.38
+
+* Fix initializing TTS in Python. (#1664)
+* Remove spaces after punctuations for TTS (#1666)
+* Add constructor fromPtr() for all flutter class with factory ctor. (#1667)
+* Add Kotlin API for Matcha-TTS models. (#1668)
+* Support Matcha-TTS models using espeak-ng (#1672)
+* Add Java API for Matcha-TTS models. (#1673)
+* Avoid adding tail padding for VAD in generate-subtitles.py (#1674)
+* Add C API for MatchaTTS models (#1675)
+* Add CXX API for MatchaTTS models (#1676)
+* Add JavaScript API (node-addon-api) for MatchaTTS models. (#1677)
+* Add HarmonyOS examples for MatchaTTS. (#1678)
+* Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)
+* Update workflows to use .NET 8.0 also. (#1681)
+* Add C# and JavaScript (wasm) API for MatchaTTS models (#1682)
+* Add Android demo for MatchaTTS models. (#1683)
+* Add Swift API for MatchaTTS models. (#1684)
+* Add Go API for MatchaTTS models (#1685)
+* Add Pascal API for MatchaTTS models. (#1686)
+* Add Dart API for MatchaTTS models (#1687)
+
+## 1.10.37
+
+* Add new tts models for Latvia and Persian+English (#1644)
+* Add a byte-level BPE Chinese+English non-streaming zipformer model (#1645)
+* Support removing invalid utf-8 sequences. (#1648)
+* Add TeleSpeech CTC to non_streaming_server.py (#1649)
+* Fix building macOS libs (#1656)
+* Add Go API for Keyword spotting (#1662)
+* Add Swift online punctuation (#1661)
+* Add C++ runtime for Matcha-TTS (#1627)
+
+## 1.10.36
+
+* Update AAR version in Android Java demo (#1618)
+* Support linking onnxruntime statically for Android (#1619)
+* Update readme to include Open-LLM-VTuber (#1622)
+* Rename maxNumStences to maxNumSentences (#1625)
+* Support using onnxruntime 1.16.0 with CUDA 11.4 on Jetson Orin NX (Linux arm64 GPU). (#1630)
+* Update readme to include jetson orin nx and nano b01 (#1631)
+* feat: add checksum action (#1632)
+* Support decoding with byte-level BPE (bbpe) models. (#1633)
+* feat: enable c api for android ci (#1635)
+* Update README.md (#1640)
+* SherpaOnnxVadAsr: Offload runSecondPass to background thread for improved real-time audio processing (#1638)
+* Fix GitHub actions. (#1642)
+
+
+## 1.10.35
+
+* Add missing changes about speaker identfication demo for HarmonyOS (#1612)
+* Provide sherpa-onnx.aar for Android (#1615)
+* Use aar in Android Java demo. (#1616)
+
+## 1.10.34
+
+* Fix building node-addon package (#1598)
+* Update doc links for HarmonyOS (#1601)
+* Add on-device real-time ASR demo for HarmonyOS (#1606)
+* Add speaker identification APIs for HarmonyOS (#1607)
+* Add speaker identification demo for HarmonyOS (#1608)
+* Add speaker diarization API for HarmonyOS. (#1609)
+* Add speaker diarization demo for HarmonyOS (#1610)
+
+## 1.10.33
+
+* Add non-streaming ASR support for HarmonyOS. (#1564)
+* Add streaming ASR support for HarmonyOS. (#1565)
+* Fix building for Android (#1568)
+* Publish `sherpa_onnx.har` for HarmonyOS (#1572)
+* Add VAD+ASR demo for HarmonyOS (#1573)
+* Fix publishing har packages for HarmonyOS (#1576)
+* Add CI to build HAPs for HarmonyOS (#1578)
+* Add microphone demo about VAD+ASR for HarmonyOS (#1581)
+* Fix getting microphone permission for HarmonyOS VAD+ASR example (#1582)
+* Add HarmonyOS support for text-to-speech. (#1584)
+* Fix: support both old and new websockets request headers format (#1588)
+* Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590)
+
+## 1.10.32
+
+* Support cross-compiling for HarmonyOS (#1553)
+* HarmonyOS support for VAD. (#1561)
+* Fix publishing flutter iOS app to appstore (#1563).
+
+## 1.10.31
+
+* Publish pre-built wheels for Python 3.13 (#1485)
+* Publish pre-built macos xcframework (#1490)
+* Fix reading tokens.txt on Windows. (#1497)
+* Add two-pass ASR Android APKs for Moonshine models. (#1499)
+* Support building GPU-capable sherpa-onnx on Linux aarch64. (#1500)
+* Publish pre-built wheels with CUDA support for Linux aarch64. (#1507)
+* Export the English TTS model from MeloTTS (#1509)
+* Add Lazarus example for Moonshine models. (#1532)
+* Add isolate_tts demo (#1529)
+* Add WebAssembly example for VAD + Moonshine models. (#1535)
+* Add Android APK for streaming Paraformer ASR (#1538)
+* Support static build for windows arm64. (#1539)
+* Use xcframework for Flutter iOS plugin to support iOS simulators.
+
+## 1.10.30
+
+* Fix building node-addon for Windows x86. (#1469)
+* Begin to support https://github.com/usefulsensors/moonshine (#1470)
+* Publish pre-built JNI libs for Linux aarch64 (#1472)
+* Add C++ runtime and Python APIs for Moonshine models (#1473)
+* Add Kotlin and Java API for Moonshine models (#1474)
+* Add C and C++ API for Moonshine models (#1476)
+* Add Swift API for Moonshine models. (#1477)
+* Add Go API examples for adding punctuations to text. (#1478)
+* Add Go API for Moonshine models (#1479)
+* Add JavaScript API for Moonshine models (#1480)
+* Add Dart API for Moonshine models. (#1481)
+* Add Pascal API for Moonshine models (#1482)
+* Add C# API for Moonshine models. (#1483)
+
+## 1.10.29
+
+* Add Go API for offline punctuation models (#1434)
+* Support https://huggingface.co/Revai/reverb-diarization-v1 (#1437)
+* Add more models for speaker diarization (#1440)
+* Add Java API example for hotwords. (#1442)
+* Add java android demo (#1454)
+* Add C++ API for streaming ASR. (#1455)
+* Add C++ API for non-streaming ASR (#1456)
+* Handle NaN embeddings in speaker diarization. (#1461)
+* Add speaker identification with VAD and non-streaming ASR using ALSA (#1463)
+* Support GigaAM CTC models for Russian ASR (#1464)
+* Add GigaAM NeMo transducer model for Russian ASR (#1467)
+
+## 1.10.28
+
+* Fix swift example for generating subtitles. (#1362)
+* Allow more online models to load tokens file from the memory (#1352)
+* Fix CI errors introduced by supporting loading keywords from buffers (#1366)
+* Fix running MeloTTS models on GPU. (#1379)
+* Support Parakeet models from NeMo (#1381)
+* Export Pyannote speaker segmentation models to onnx (#1382)
+* Support Agglomerative clustering. (#1384)
+* Add Python API for clustering (#1385)
+* support whisper turbo (#1390)
+* context_state is not set correctly when previous context is passed after reset (#1393)
+* Speaker diarization example with onnxruntime Python API (#1395)
+* C++ API for speaker diarization (#1396)
+* Python API for speaker diarization. (#1400)
+* C API for speaker diarization (#1402)
+* docs(nodejs-addon-examples): add guide for pnpm user (#1401)
+* Go API for speaker diarization (#1403)
+* Swift API for speaker diarization (#1404)
+* Update readme to include more external projects using sherpa-onnx (#1405)
+* C# API for speaker diarization (#1407)
+* JavaScript API (node-addon) for speaker diarization (#1408)
+* WebAssembly exmaple for speaker diarization (#1411)
+* Handle audio files less than 10s long for speaker diarization. (#1412)
+* JavaScript API with WebAssembly for speaker diarization (#1414)
+* Kotlin API for speaker diarization (#1415)
+* Java API for speaker diarization (#1416)
+* Dart API for speaker diarization (#1418)
+* Pascal API for speaker diarization (#1420)
+* Android JNI support for speaker diarization (#1421)
+* Android demo for speaker diarization (#1423)
+
## 1.10.27
* Add non-streaming ONNX models for Russian ASR (#1358)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9084a0216a..ef6d45b18f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,10 +8,9 @@ set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
project(sherpa-onnx)
# Remember to update
-# ./nodejs-addon-examples
-# ./dart-api-examples/
# ./CHANGELOG.md
-set(SHERPA_ONNX_VERSION "1.10.27")
+# ./new-release.sh
+set(SHERPA_ONNX_VERSION "1.10.42")
# Disable warning about
#
@@ -32,6 +31,7 @@ option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" O
option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
option(SHERPA_ONNX_ENABLE_DIRECTML "Enable ONNX Runtime DirectML support" OFF)
option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
+option(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION "Whether to enable WASM for speaker diarization" OFF)
option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
@@ -46,13 +46,18 @@ option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-i
option(SHERPA_ONNX_ENABLE_SANITIZER "Whether to enable ubsan and asan" OFF)
option(SHERPA_ONNX_BUILD_C_API_EXAMPLES "Whether to enable C API examples" ON)
+set(SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION "1.11.0" CACHE STRING "Used only for Linux ARM64 GPU. If you use Jetson nano b01, then please set it to 1.11.0. If you use Jetson Orin NX, then set it to 1.16.0")
+
+
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
-set(CMAKE_SKIP_BUILD_RPATH FALSE)
-set(BUILD_RPATH_USE_ORIGIN TRUE)
-set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+if(NOT WIN32)
+ set(CMAKE_SKIP_BUILD_RPATH FALSE)
+ set(BUILD_RPATH_USE_ORIGIN TRUE)
+ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+endif()
if(NOT APPLE)
set(SHERPA_ONNX_RPATH_ORIGIN "$ORIGIN")
@@ -80,11 +85,6 @@ if(SHERPA_ONNX_ENABLE_PYTHON AND NOT BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
endif()
-if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS)
- message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_JNI is ON")
- set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
-endif()
-
if(SHERPA_ONNX_ENABLE_GPU)
message(WARNING "\
Compiling for NVIDIA GPU is enabled. Please make sure cudatoolkit
@@ -123,6 +123,11 @@ if(MSVC)
)
endif()
+if(CMAKE_SYSTEM_NAME STREQUAL OHOS)
+ set(CMAKE_CXX_FLAGS "-Wno-unused-command-line-argument ${CMAKE_CXX_FLAGS}")
+ set(CMAKE_C_FLAGS "-Wno-unused-command-line-argument ${CMAKE_C_FLAGS}")
+endif()
+
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
@@ -135,6 +140,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
+message(STATUS "SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION ${SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
@@ -149,7 +155,7 @@ message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_
message(STATUS "SHERPA_ONNX_ENABLE_SANITIZER: ${SHERPA_ONNX_ENABLE_SANITIZER}")
message(STATUS "SHERPA_ONNX_BUILD_C_API_EXAMPLES: ${SHERPA_ONNX_BUILD_C_API_EXAMPLES}")
-if(BUILD_SHARED_LIBS)
+if(BUILD_SHARED_LIBS OR SHERPA_ONNX_ENABLE_JNI)
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -196,9 +202,19 @@ else()
add_definitions(-DSHERPA_ONNX_ENABLE_DIRECTML=0)
endif()
+if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION)
+ if(NOT SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
+ message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION to ON if you want to build WASM for speaker diarization")
+ endif()
+
+ if(NOT SHERPA_ONNX_ENABLE_WASM)
+ message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for speaker diarization")
+ endif()
+endif()
+
if(SHERPA_ONNX_ENABLE_WASM_TTS)
if(NOT SHERPA_ONNX_ENABLE_TTS)
- message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build wasm TTS")
+ message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build WASM for TTS")
endif()
if(NOT SHERPA_ONNX_ENABLE_WASM)
@@ -250,7 +266,7 @@ message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
include(CheckIncludeFileCXX)
-if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android)
+if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android AND NOT CMAKE_SYSTEM_NAME STREQUAL OHOS)
check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
if(SHERPA_ONNX_HAS_ALSA)
message(STATUS "With Alsa")
@@ -387,6 +403,7 @@ add_subdirectory(sherpa-onnx)
if(SHERPA_ONNX_ENABLE_C_API AND SHERPA_ONNX_ENABLE_BINARY AND SHERPA_ONNX_BUILD_C_API_EXAMPLES)
set(SHERPA_ONNX_PKG_WITH_CARGS "-lcargs")
add_subdirectory(c-api-examples)
+ add_subdirectory(cxx-api-examples)
endif()
if(SHERPA_ONNX_ENABLE_WASM)
diff --git a/README.md b/README.md
index 890abe8827..b5fc25115e 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,12 @@
### Supported functions
-|Speech recognition| Speech synthesis | Speaker verification | Speaker identification |
-|------------------|------------------|----------------------|------------------------|
-| ✔️ | ✔️ | ✔️ | ✔️ |
+|Speech recognition| Speech synthesis |
+|------------------|------------------|
+| ✔️ | ✔️ |
+
+|Speaker identification| Speaker diarization | Speaker verification |
+|----------------------|-------------------- |------------------------|
+| ✔️ | ✔️ | ✔️ |
| Spoken Language identification | Audio tagging | Voice activity detection |
|--------------------------------|---------------|--------------------------|
@@ -14,14 +18,13 @@
### Supported platforms
-|Architecture| Android | iOS | Windows | macOS | linux |
-|------------|---------|---------|------------|-------|-------|
-| x64 | ✔️ | | ✔️ | ✔️ | ✔️ |
-| x86 | ✔️ | | ✔️ | | |
-| arm64 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
-| arm32 | ✔️ | | | | ✔️ |
-| riscv64 | | | | | ✔️ |
-
+|Architecture| Android | iOS | Windows | macOS | linux | HarmonyOS |
+|------------|---------|---------|------------|-------|-------|-----------|
+| x64 | ✔️ | | ✔️ | ✔️ | ✔️ | ✔️ |
+| x86 | ✔️ | | ✔️ | | | |
+| arm64 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
+| arm32 | ✔️ | | | | ✔️ | ✔️ |
+| riscv64 | | | | | ✔️ | |
### Supported programming languages
@@ -47,6 +50,7 @@ This repository supports running the following functions **locally**
- Speech-to-text (i.e., ASR); both streaming and non-streaming are supported
- Text-to-speech (i.e., TTS)
+ - Speaker diarization
- Speaker identification
- Speaker verification
- Spoken language identification
@@ -60,8 +64,11 @@ on the following platforms and operating systems:
- Linux, macOS, Windows, openKylin
- Android, WearOS
- iOS
+ - HarmonyOS
- NodeJS
- WebAssembly
+ - [NVIDIA Jetson Orin NX][NVIDIA Jetson Orin NX] (Support running on both CPU and GPU)
+ - [NVIDIA Jetson Nano B01][NVIDIA Jetson Nano B01] (Support running on both CPU and GPU)
- [Raspberry Pi][Raspberry Pi]
- [RV1126][RV1126]
- [LicheePi4A][LicheePi4A]
@@ -79,17 +86,19 @@ with the following APIs
### Links for Huggingface Spaces
-You can visit the following Huggingface spaces to try `sherpa-onnx` without
-installing anything. All you need is a browser.
-
-| Description | URL |
-|-------------------------------------------------------|------------------------------------|
-| Speech recognition | [Click me][hf-space-asr] |
-| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] |
-| Speech synthesis | [Click me][hf-space-tts] |
-| Generate subtitles | [Click me][hf-space-subtitle] |
-| Audio tagging | [Click me][hf-space-audio-tagging] |
-| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] |
+
+You can visit the following Huggingface spaces to try sherpa-onnx without
+installing anything. All you need is a browser.
+
+| Description | URL |
+|-------------------------------------------------------|-----------------------------------------|
+| Speaker diarization | [Click me][hf-space-speaker-diarization]|
+| Speech recognition | [Click me][hf-space-asr] |
+| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] |
+| Speech synthesis | [Click me][hf-space-tts] |
+| Generate subtitles | [Click me][hf-space-subtitle] |
+| Audio tagging | [Click me][hf-space-audio-tagging] |
+| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] |
We also have spaces built using WebAssembly. They are listed below:
@@ -102,6 +111,7 @@ We also have spaces built using WebAssembly. They are listed below:
|Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer] |[地址][wasm-ms-streaming-asr-en-zipformer]|
|VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]|
|VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]|
+|VAD + speech recognition (English) with [Moonshine tiny][Moonshine tiny]|[Click me][wasm-hf-vad-asr-en-moonshine-tiny-en]| [地址][wasm-ms-vad-asr-en-moonshine-tiny-en]|
|VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech] |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]|
|VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech] |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]|
|VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]|
@@ -111,24 +121,36 @@ We also have spaces built using WebAssembly. They are listed below:
|VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]|
|Speech synthesis (English) |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]|
|Speech synthesis (German) |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]|
+|Speaker diarization |[Click me][wasm-hf-speaker-diarization]|[地址][wasm-ms-speaker-diarization]|
+
+
### Links for pre-built Android APKs
-| Description | URL | 中国用户 |
-|----------------------------------------|------------------------------|-----------------------------|
-| Streaming speech recognition | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]|
-| Text-to-speech | [Address][apk-tts] | [点此][apk-tts-cn] |
-| Voice activity detection (VAD) | [Address][apk-vad] | [点此][apk-vad-cn] |
-| VAD + non-streaming speech recognition | [Address][apk-vad-asr] | [点此][apk-vad-asr-cn] |
-| Two-pass speech recognition | [Address][apk-2pass] | [点此][apk-2pass-cn] |
-| Audio tagging | [Address][apk-at] | [点此][apk-at-cn] |
-| Audio tagging (WearOS) | [Address][apk-at-wearos] | [点此][apk-at-wearos-cn] |
-| Speaker identification | [Address][apk-sid] | [点此][apk-sid-cn] |
-| Spoken language identification | [Address][apk-slid] | [点此][apk-slid-cn] |
-| Keyword spotting | [Address][apk-kws] | [点此][apk-kws-cn] |
+
+
+You can find pre-built Android APKs for this repository in the following table
+
+| Description | URL | 中国用户 |
+|----------------------------------------|------------------------------------|-----------------------------------|
+| Speaker diarization | [Address][apk-speaker-diarization] | [点此][apk-speaker-diarization-cn]|
+| Streaming speech recognition | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn] |
+| Text-to-speech | [Address][apk-tts] | [点此][apk-tts-cn] |
+| Voice activity detection (VAD) | [Address][apk-vad] | [点此][apk-vad-cn] |
+| VAD + non-streaming speech recognition | [Address][apk-vad-asr] | [点此][apk-vad-asr-cn] |
+| Two-pass speech recognition | [Address][apk-2pass] | [点此][apk-2pass-cn] |
+| Audio tagging | [Address][apk-at] | [点此][apk-at-cn] |
+| Audio tagging (WearOS) | [Address][apk-at-wearos] | [点此][apk-at-wearos-cn] |
+| Speaker identification | [Address][apk-sid] | [点此][apk-sid-cn] |
+| Spoken language identification | [Address][apk-slid] | [点此][apk-slid-cn] |
+| Keyword spotting | [Address][apk-kws] | [点此][apk-kws-cn] |
+
+
### Links for pre-built Flutter APPs
+
+
#### Real-time speech recognition
| Description | URL | 中国用户 |
@@ -147,17 +169,24 @@ We also have spaces built using WebAssembly. They are listed below:
> Note: You need to build from source for iOS.
+
+
### Links for pre-built Lazarus APPs
+
+
#### Generating subtitles
| Description | URL | 中国用户 |
|--------------------------------|----------------------------|----------------------------|
| Generate subtitles (生成字幕) | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]|
+
### Links for pre-trained models
+
+
| Description | URL |
|---------------------------------------------|---------------------------------------------------------------------------------------|
| Speech recognition (speech to text, ASR) | [Address][asr-models] |
@@ -168,6 +197,64 @@ We also have spaces built using WebAssembly. They are listed below:
| Speaker identification (Speaker ID) | [Address][sid-models] |
| Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from [Speech recognition][asr-models]|
| Punctuation | [Address][punct-models] |
+| Speaker segmentation | [Address][speaker-segmentation-models] |
+
+
+
+#### Some pre-trained ASR models (Streaming)
+
+
+
+Please see
+
+ -
+ -
+ -
+
+for more models. The following table lists only **SOME** of them.
+
+
+|Name | Supported Languages| Description|
+|-----|-----|----|
+|[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20][sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english)|
+|[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16][sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16-bilingual-chinese-english)|
+|[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23][sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]|Chinese| Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23)|
+|[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17][sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]|English|Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-en-20m-2023-02-17)|
+|[sherpa-onnx-streaming-zipformer-korean-2024-06-16][sherpa-onnx-streaming-zipformer-korean-2024-06-16]|Korean| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-korean-2024-06-16-korean)|
+|[sherpa-onnx-streaming-zipformer-fr-2023-04-14][sherpa-onnx-streaming-zipformer-fr-2023-04-14]|French| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#shaojieli-sherpa-onnx-streaming-zipformer-fr-2023-04-14-french)|
+
+
+
+
+#### Some pre-trained ASR models (Non-Streaming)
+
+
+
+Please see
+
+ -
+ -
+ -
+ -
+ -
+
+for more models. The following table lists only **SOME** of them.
+
+|Name | Supported Languages| Description|
+|-----|-----|----|
+|[Whisper tiny.en](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2)|English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html)|
+|[Moonshine tiny][Moonshine tiny]|English|See [also](https://github.com/usefulsensors/moonshine)|
+|[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17][sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]|Chinese, Cantonese, English, Korean, Japanese| 支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html)|
+|[sherpa-onnx-paraformer-zh-2024-03-09][sherpa-onnx-paraformer-zh-2024-03-09]|Chinese, English| 也支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2024-03-09-chinese-english)|
+|[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01][sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]|Japanese|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01-japanese)|
+|[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24][sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24-russian)|
+|[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24][sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]|Russian| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/russian.html#sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24)|
+|[sherpa-onnx-zipformer-ru-2024-09-18][sherpa-onnx-zipformer-ru-2024-09-18]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ru-2024-09-18-russian)|
+|[sherpa-onnx-zipformer-korean-2024-06-24][sherpa-onnx-zipformer-korean-2024-06-24]|Korean|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-korean-2024-06-24-korean)|
+|[sherpa-onnx-zipformer-thai-2024-06-20][sherpa-onnx-zipformer-thai-2024-06-20]|Thai| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-thai-2024-06-20-thai)|
+|[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04][sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]|Chinese| 支持多种方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html#sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04)|
+
+
### Useful links
@@ -182,6 +269,13 @@ for 新一代 Kaldi **微信交流群** and **QQ 交流群**.
## Projects using sherpa-onnx
+### [Open-LLM-VTuber](https://github.com/t41372/Open-LLM-VTuber)
+
+Talk to any LLM with hands-free voice interaction, voice interruption, and Live2D taking
+face running locally across platforms
+
+See also
+
### [voiceapi](https://github.com/ruzhila/voiceapi)
@@ -191,10 +285,30 @@ for 新一代 Kaldi **微信交流群** and **QQ 交流群**.
It shows how to use the ASR and TTS Python APIs with FastAPI.
-### [TMSpeech](https://github.com/jxlpzqc/TMSpeech)
+### [腾讯会议摸鱼工具 TMSpeech](https://github.com/jxlpzqc/TMSpeech)
Uses streaming ASR in C# with graphical user interface.
+Video demo in Chinese: [【开源】Windows实时字幕软件(网课/开会必备)](https://www.bilibili.com/video/BV1rX4y1p7Nx)
+
+### [lol互动助手](https://github.com/l1veIn/lol-wom-electron)
+
+It uses the JavaScript API of sherpa-onnx along with [Electron](https://electronjs.org/)
+
+Video demo in Chinese: [爆了!炫神教你开打字挂!真正影响胜率的英雄联盟工具!英雄联盟的最后一块拼图!和游戏中的每个人无障碍沟通!](https://www.bilibili.com/video/BV142tje9E74)
+
+### [Sherpa-ONNX 语音识别服务器](https://github.com/hfyydd/sherpa-onnx-server)
+
+A server based on nodejs providing Restful API for speech recognition.
+
+### [QSmartAssistant](https://github.com/xinhecuican/QSmartAssistant)
+
+一个模块化,全过程可离线,低占用率的对话机器人/智能音箱
+
+It uses QT. Both [ASR](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#asr)
+and [TTS](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#tts)
+are used.
+
[sherpa-rs]: https://github.com/thewh1teagle/sherpa-rs
[silero-vad]: https://github.com/snakers4/silero-vad
@@ -204,6 +318,7 @@ Uses streaming ASR in C# with graphical user interface.
[VisionFive 2]: https://www.starfivetech.com/en/site/boards
[旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
[爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
+[hf-space-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/speaker-diarization
[hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
[Whisper]: https://github.com/openai/whisper
[hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
@@ -227,6 +342,8 @@ Uses streaming ASR in C# with graphical user interface.
[wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice
[wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
[wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
+[wasm-hf-vad-asr-en-moonshine-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny
+[wasm-ms-vad-asr-en-moonshine-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny
[wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
[wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
[wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
@@ -248,6 +365,10 @@ Uses streaming ASR in C# with graphical user interface.
[wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en
[wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de
[wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de
+[wasm-hf-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx
+[wasm-ms-speaker-diarization]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx
+[apk-speaker-diarization]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk.html
+[apk-speaker-diarization-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk-cn.html
[apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html
[apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html
[apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
@@ -290,5 +411,24 @@ Uses streaming ASR in C# with graphical user interface.
[sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
[slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
[punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
+[speaker-segmentation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
[GigaSpeech]: https://github.com/SpeechColab/GigaSpeech
[WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech
+[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16.tar.bz2
+[sherpa-onnx-streaming-zipformer-korean-2024-06-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-korean-2024-06-16.tar.bz2
+[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2
+[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
+[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
+[sherpa-onnx-zipformer-ru-2024-09-18]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ru-2024-09-18.tar.bz2
+[sherpa-onnx-zipformer-korean-2024-06-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-korean-2024-06-24.tar.bz2
+[sherpa-onnx-zipformer-thai-2024-06-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-thai-2024-06-20.tar.bz2
+[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24.tar.bz2
+[sherpa-onnx-paraformer-zh-2024-03-09]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2024-03-09.tar.bz2
+[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
+[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+[sherpa-onnx-streaming-zipformer-fr-2023-04-14]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-fr-2023-04-14.tar.bz2
+[Moonshine tiny]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+[NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9
+[NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/
diff --git a/android/README.md b/android/README.md
index 42b29e08f3..bae3355987 100644
--- a/android/README.md
+++ b/android/README.md
@@ -4,6 +4,8 @@ Please refer to
https://k2-fsa.github.io/sherpa/onnx/android/index.html
for usage.
+- [SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization) It is for speaker diarization.
+
- [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model.
- [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model
diff --git a/android/SherpaOnnxAar/.gitignore b/android/SherpaOnnxAar/.gitignore
new file mode 100644
index 0000000000..aa724b7707
--- /dev/null
+++ b/android/SherpaOnnxAar/.gitignore
@@ -0,0 +1,15 @@
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
diff --git a/android/SherpaOnnxAar/README.md b/android/SherpaOnnxAar/README.md
new file mode 100644
index 0000000000..3238153c2a
--- /dev/null
+++ b/android/SherpaOnnxAar/README.md
@@ -0,0 +1,20 @@
+# Usage of this project
+
+```
+git clone https://github.com/k2-fsa/sherpa-onnx
+cd sherpa-onnx
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.10.42/sherpa-onnx-v1.10.42-android.tar.bz2
+tar xvf sherpa-onnx-v1.10.42-android.tar.bz2
+
+cp -v jniLibs/arm64-v8a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/
+cp -v jniLibs/armeabi-v7a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/
+cp -v jniLibs/x86/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/
+cp -v jniLibs/x86_64/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/
+
+cd android/SherpaOnnxAar
+
+./gradlew :sherpa_onnx:assembleRelease
+ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
+cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.10.42.aar
+```
diff --git a/android/SherpaOnnxAar/build.gradle.kts b/android/SherpaOnnxAar/build.gradle.kts
new file mode 100644
index 0000000000..e3f8a07411
--- /dev/null
+++ b/android/SherpaOnnxAar/build.gradle.kts
@@ -0,0 +1,6 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+ alias(libs.plugins.android.application) apply false
+ alias(libs.plugins.jetbrains.kotlin.android) apply false
+ alias(libs.plugins.android.library) apply false
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/gradle.properties b/android/SherpaOnnxAar/gradle.properties
new file mode 100644
index 0000000000..20e2a01520
--- /dev/null
+++ b/android/SherpaOnnxAar/gradle.properties
@@ -0,0 +1,23 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. For more details, visit
+# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app's APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Kotlin code style for this project: "official" or "obsolete":
+kotlin.code.style=official
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/gradle/libs.versions.toml b/android/SherpaOnnxAar/gradle/libs.versions.toml
new file mode 100644
index 0000000000..56172d2933
--- /dev/null
+++ b/android/SherpaOnnxAar/gradle/libs.versions.toml
@@ -0,0 +1,23 @@
+[versions]
+agp = "8.4.0"
+kotlin = "1.7.20"
+coreKtx = "1.15.0"
+junit = "4.13.2"
+junitVersion = "1.2.1"
+espressoCore = "3.6.1"
+appcompat = "1.7.0"
+material = "1.12.0"
+
+[libraries]
+androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
+junit = { group = "junit", name = "junit", version.ref = "junit" }
+androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
+androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
+androidx-appcompat = { group = "androidx.appcompat", name = "appcompat", version.ref = "appcompat" }
+material = { group = "com.google.android.material", name = "material", version.ref = "material" }
+
+[plugins]
+android-application = { id = "com.android.application", version.ref = "agp" }
+jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
+android-library = { id = "com.android.library", version.ref = "agp" }
+
diff --git a/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000..e708b1c023
Binary files /dev/null and b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..a8b3563581
--- /dev/null
+++ b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Thu Dec 12 14:02:30 CST 2024
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/android/SherpaOnnxAar/gradlew b/android/SherpaOnnxAar/gradlew
new file mode 100755
index 0000000000..4f906e0c81
--- /dev/null
+++ b/android/SherpaOnnxAar/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=`expr $i + 1`
+ done
+ case $i in
+ 0) set -- ;;
+ 1) set -- "$args0" ;;
+ 2) set -- "$args0" "$args1" ;;
+ 3) set -- "$args0" "$args1" "$args2" ;;
+ 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/android/SherpaOnnxAar/gradlew.bat b/android/SherpaOnnxAar/gradlew.bat
new file mode 100644
index 0000000000..ac1b06f938
--- /dev/null
+++ b/android/SherpaOnnxAar/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/android/SherpaOnnxAar/settings.gradle.kts b/android/SherpaOnnxAar/settings.gradle.kts
new file mode 100644
index 0000000000..53ee52b547
--- /dev/null
+++ b/android/SherpaOnnxAar/settings.gradle.kts
@@ -0,0 +1,23 @@
+pluginManagement {
+ repositories {
+ google {
+ content {
+ includeGroupByRegex("com\\.android.*")
+ includeGroupByRegex("com\\.google.*")
+ includeGroupByRegex("androidx.*")
+ }
+ }
+ mavenCentral()
+ gradlePluginPortal()
+ }
+}
+dependencyResolutionManagement {
+ repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+ repositories {
+ google()
+ mavenCentral()
+ }
+}
+
+rootProject.name = "SherpaOnnxAar"
+include(":sherpa_onnx")
diff --git a/android/SherpaOnnxAar/sherpa_onnx/.gitignore b/android/SherpaOnnxAar/sherpa_onnx/.gitignore
new file mode 100644
index 0000000000..42afabfd2a
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts b/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts
new file mode 100644
index 0000000000..4803cb8378
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts
@@ -0,0 +1,43 @@
+plugins {
+ alias(libs.plugins.android.library)
+ alias(libs.plugins.jetbrains.kotlin.android)
+}
+
+android {
+ namespace = "com.k2fsa.sherpa.onnx"
+ compileSdk = 34
+
+ defaultConfig {
+ minSdk = 21
+
+ testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+ consumerProguardFiles("consumer-rules.pro")
+ }
+
+ buildTypes {
+ release {
+ isMinifyEnabled = false
+ proguardFiles(
+ getDefaultProguardFile("proguard-android-optimize.txt"),
+ "proguard-rules.pro"
+ )
+ }
+ }
+ compileOptions {
+ sourceCompatibility = JavaVersion.VERSION_1_8
+ targetCompatibility = JavaVersion.VERSION_1_8
+ }
+ kotlinOptions {
+ jvmTarget = "1.8"
+ }
+}
+
+dependencies {
+
+ implementation(libs.androidx.core.ktx)
+ implementation(libs.androidx.appcompat)
+ implementation(libs.material)
+ testImplementation(libs.junit)
+ androidTestImplementation(libs.androidx.junit)
+ androidTestImplementation(libs.androidx.espresso.core)
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/consumer-rules.pro b/android/SherpaOnnxAar/sherpa_onnx/consumer-rules.pro
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro b/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro
new file mode 100644
index 0000000000..481bb43481
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+# http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+# public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt b/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
new file mode 100644
index 0000000000..db1fbefc31
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
@@ -0,0 +1,24 @@
+package com.k2fsa.sherpa.onnx
+
+import androidx.test.platform.app.InstrumentationRegistry
+import androidx.test.ext.junit.runners.AndroidJUnit4
+
+import org.junit.Test
+import org.junit.runner.RunWith
+
+import org.junit.Assert.*
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+@RunWith(AndroidJUnit4::class)
+class ExampleInstrumentedTest {
+ @Test
+ fun useAppContext() {
+ // Context of the app under test.
+ val appContext = InstrumentationRegistry.getInstrumentation().targetContext
+ assertEquals("com.k2fsa.sherpa.onnx.test", appContext.packageName)
+ }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml b/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..a5918e68ab
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt
new file mode 120000
index 0000000000..25c36e3965
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/AudioTagging.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt
new file mode 120000
index 0000000000..952fae878a
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt
new file mode 120000
index 0000000000..4392376a1f
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt
new file mode 120000
index 0000000000..1eed71678d
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflinePunctuation.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt
new file mode 120000
index 0000000000..faa3ab4acf
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt
new file mode 120000
index 0000000000..d850dd7fdc
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt
new file mode 120000
index 0000000000..2a3aff864e
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt
new file mode 120000
index 0000000000..5bb19ee10e
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt
new file mode 120000
index 0000000000..d4518b89bf
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt
new file mode 120000
index 0000000000..66441dea73
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Speaker.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt
new file mode 120000
index 0000000000..754102447c
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt
new file mode 120000
index 0000000000..de79a7d20a
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
new file mode 120000
index 0000000000..f1392e7712
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Tts.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt
new file mode 120000
index 0000000000..761b158ce9
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
new file mode 120000
index 0000000000..05c8fb2463
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt b/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
new file mode 100644
index 0000000000..05dfcd635f
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
@@ -0,0 +1,17 @@
+package com.k2fsa.sherpa.onnx
+
+import org.junit.Test
+
+import org.junit.Assert.*
+
+/**
+ * Example local unit test, which will execute on the development machine (host).
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+class ExampleUnitTest {
+ @Test
+ fun addition_isCorrect() {
+ assertEquals(4, 2 + 2)
+ }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/.gitignore b/android/SherpaOnnxJavaDemo/.gitignore
new file mode 100644
index 0000000000..aa724b7707
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/.gitignore
@@ -0,0 +1,15 @@
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
diff --git a/android/SherpaOnnxJavaDemo/README.md b/android/SherpaOnnxJavaDemo/README.md
new file mode 100644
index 0000000000..8d7b84dbd5
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/README.md
@@ -0,0 +1,44 @@
+# Introduction
+
+Please run the following commands to download model files before you run this Android demo:
+
+```bash
+# Assume we are inside
+# /Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo
+
+cd app/src/main/assets/
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+
+tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ./
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ./
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ./
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ./
+
+rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/*
+
+mv encoder-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+mv decoder-epoch-99-avg-1.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+mv joiner-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+mv tokens.txt sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+```
+
+You should have the following directory structure:
+```
+(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo/app/src/main/assets
+
+(py38) fangjuns-MacBook-Pro:assets fangjun$ tree .
+.
+└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+ ├── decoder-epoch-99-avg-1.onnx
+ ├── encoder-epoch-99-avg-1.int8.onnx
+ ├── joiner-epoch-99-avg-1.int8.onnx
+ └── tokens.txt
+
+1 directory, 4 files
+```
+
+Remember to remove unused files to reduce the file size of the final APK.
diff --git a/android/SherpaOnnxJavaDemo/app/.gitignore b/android/SherpaOnnxJavaDemo/app/.gitignore
new file mode 100644
index 0000000000..42afabfd2a
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/build.gradle b/android/SherpaOnnxJavaDemo/app/build.gradle
new file mode 100644
index 0000000000..d8ccc7a608
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/build.gradle
@@ -0,0 +1,38 @@
+plugins {
+ id 'com.android.application'
+}
+
+android {
+ compileSdk 34
+
+ defaultConfig {
+ applicationId "com.k2fsa.sherpa.onnx"
+ minSdk 28
+ targetSdk 34
+ versionCode 1
+ versionName "1.0"
+
+ testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+ }
+
+ buildTypes {
+ release {
+ minifyEnabled false
+ proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+ }
+ }
+ compileOptions {
+ sourceCompatibility JavaVersion.VERSION_1_8
+ targetCompatibility JavaVersion.VERSION_1_8
+ }
+}
+
+dependencies {
+ implementation 'androidx.appcompat:appcompat:1.3.1'
+ implementation 'com.google.android.material:material:1.3.0'
+ implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
+ implementation 'pub.devrel:easypermissions:3.0.0'
+ implementation 'androidx.core:core-ktx:1.7.0'
+ // implementation files('/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxAar/sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar')
+ implementation 'com.github.k2-fsa:sherpa-onnx:v1.10.42'
+}
diff --git a/android/SherpaOnnxJavaDemo/app/proguard-rules.pro b/android/SherpaOnnxJavaDemo/app/proguard-rules.pro
new file mode 100644
index 0000000000..481bb43481
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+# http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+# public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml b/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..947820249a
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/assets/.gitkeep b/android/SherpaOnnxJavaDemo/app/src/main/assets/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java
new file mode 100644
index 0000000000..bd5f8a86f1
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java
@@ -0,0 +1,18 @@
+package com.k2fsa.sherpa.onnx;
+
+import androidx.lifecycle.LiveData;
+import androidx.lifecycle.MutableLiveData;
+import androidx.lifecycle.ViewModel;
+
+public class AppViewModel extends ViewModel {
+ private final MutableLiveData speechRecognitionResult = new MutableLiveData<>();
+
+ public LiveData getSpeechRecognitionResult() {
+ return speechRecognitionResult;
+ }
+
+ public void setSpeechRecognitionResult(String result) {
+ speechRecognitionResult.postValue(result);
+ }
+
+}
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java
new file mode 100644
index 0000000000..f8acf35cb9
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java
@@ -0,0 +1,39 @@
+package com.k2fsa.sherpa.onnx;
+
+import androidx.annotation.NonNull;
+import androidx.lifecycle.ViewModelProvider;
+import androidx.lifecycle.ViewModelStore;
+import androidx.lifecycle.ViewModelStoreOwner;
+
+
+public class Application extends android.app.Application implements ViewModelStoreOwner {
+ public static Application sApplication;
+
+
+ private AppViewModel viewModel;
+ private ViewModelStore viewModelStore;
+
+ public static Application getInstance() {
+ return sApplication;
+ }
+
+ @Override
+ public void onCreate() {
+ super.onCreate();
+ sApplication = this;
+ viewModelStore = new ViewModelStore();
+ viewModel = new ViewModelProvider(this).get(AppViewModel.class);
+ }
+
+ @NonNull
+ @Override
+ public ViewModelStore getViewModelStore() {
+ return viewModelStore;
+ }
+
+ public AppViewModel getViewModel() {
+ return viewModel;
+ }
+
+
+}
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java
new file mode 100644
index 0000000000..c465f6c30b
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java
@@ -0,0 +1,52 @@
+package com.k2fsa.sherpa.onnx;
+
+import androidx.appcompat.app.AppCompatActivity;
+import androidx.core.content.ContextCompat;
+import androidx.lifecycle.ViewModelProvider;
+
+import android.Manifest;
+import android.content.Intent;
+import android.os.Bundle;
+import android.util.Log;
+import android.widget.TextView;
+
+import com.k2fsa.sherpa.onnx.service.SpeechSherpaRecognitionService;
+
+import pub.devrel.easypermissions.EasyPermissions;
+
+public class MainActivity extends AppCompatActivity {
+ private AppViewModel appViewModel;
+ private TextView tvText;
+ private static final int RC_AUDIO_PERM = 123;
+
+ @Override
+ protected void onCreate(Bundle savedInstanceState) {
+ super.onCreate(savedInstanceState);
+ setContentView(R.layout.activity_main);
+ tvText = findViewById(R.id.text);
+ requestMicrophonePermission();
+ }
+
+
+ private void startSpeechService() {
+ Intent serviceIntent = new Intent(this, SpeechSherpaRecognitionService.class);
+ ContextCompat.startForegroundService(this, serviceIntent);
+ appViewModel = new ViewModelProvider(Application.getInstance()).get(AppViewModel.class);
+ appViewModel.getSpeechRecognitionResult().observe(this, this::handleSpeechRecognitionResult);
+ }
+
+ private void handleSpeechRecognitionResult(String result) {
+ tvText.setText(result);
+ }
+
+ private void requestMicrophonePermission() {
+ String[] perms = {Manifest.permission.RECORD_AUDIO};
+ if (EasyPermissions.hasPermissions(this, perms)) {
+ startSpeechService();
+ } else {
+ EasyPermissions.requestPermissions(MainActivity.this,
+ "We need access to your microphone for voice recognition",
+ RC_AUDIO_PERM, perms);
+ }
+ }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java
new file mode 100644
index 0000000000..02ad4a15d2
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java
@@ -0,0 +1,261 @@
+package com.k2fsa.sherpa.onnx.service;
+
+import android.Manifest;
+import android.annotation.SuppressLint;
+import android.app.Notification;
+import android.app.NotificationChannel;
+import android.app.NotificationManager;
+import android.app.Service;
+import android.content.Intent;
+import android.content.pm.PackageManager;
+import android.content.res.AssetManager;
+import android.media.AudioFormat;
+import android.media.AudioRecord;
+import android.media.MediaRecorder;
+import android.os.Build;
+import android.os.IBinder;
+import android.text.TextUtils;
+import android.util.Log;
+
+import androidx.core.app.ActivityCompat;
+import androidx.core.app.NotificationCompat;
+
+
+import com.k2fsa.sherpa.onnx.AppViewModel;
+import com.k2fsa.sherpa.onnx.Application;
+
+import com.k2fsa.sherpa.onnx.OnlineModelConfig;
+import com.k2fsa.sherpa.onnx.OnlineRecognizer;
+
+import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig;
+import com.k2fsa.sherpa.onnx.OnlineStream;
+import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig;
+import com.k2fsa.sherpa.onnx.R;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import java.util.Objects;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+
+public class SpeechSherpaRecognitionService extends Service {
+
+ private AppViewModel appViewModel;
+ private OnlineRecognizer recognizer;
+ private final int sampleRateInHz = 16000;
+
+ private Thread recordingThread;
+ private boolean isRecording = false;
+ private int audioSource = MediaRecorder.AudioSource.MIC;
+ private int channelConfig = AudioFormat.CHANNEL_IN_MONO;
+ private int audioFormat = AudioFormat.ENCODING_PCM_16BIT;
+ private AudioRecord audioRecord;
+ private int idx = 0;
+ private String lastText = "";
+ private ExecutorService executor;
+
+ @Override
+ public void onCreate() {
+ super.onCreate();
+ startForegroundService();
+ // 获取 ViewModel
+ appViewModel = Application.getInstance().getViewModel();
+ int numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
+
+ if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
+ // TODO: Consider calling
+ // ActivityCompat#requestPermissions
+ // here to request the missing permissions, and then overriding
+ // public void onRequestPermissionsResult(int requestCode, String[] permissions,
+ // int[] grantResults)
+ // to handle the case where the user grants the permission. See the documentation
+ // for ActivityCompat#requestPermissions for more details.
+ return;
+ }
+ audioRecord = new AudioRecord(
+ audioSource,
+ sampleRateInHz,
+ channelConfig,
+ audioFormat,
+ numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
+ );
+ executor = Executors.newSingleThreadExecutor();
+ executor.execute(this::initializeSherpa);
+ }
+
+
+ private void initializeSherpa() {
+ Log.d("Current Directory", System.getProperty("user.dir"));
+ String modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
+ initializeSherpaDir(modelDir, modelDir);
+ OnlineTransducerModelConfig onlineTransducerModelConfig = new OnlineTransducerModelConfig();
+ onlineTransducerModelConfig.setEncoder(modelDir + "/encoder-epoch-99-avg-1.int8.onnx");
+ onlineTransducerModelConfig.setDecoder(modelDir + "/decoder-epoch-99-avg-1.onnx");
+ onlineTransducerModelConfig.setJoiner(modelDir + "/joiner-epoch-99-avg-1.int8.onnx");
+
+ OnlineModelConfig onlineModelConfig = new OnlineModelConfig();
+ onlineModelConfig.setTransducer(onlineTransducerModelConfig);
+ onlineModelConfig.setTokens(modelDir + "/tokens.txt");
+ onlineModelConfig.setModelType("zipformer");
+ onlineModelConfig.setDebug(true);
+
+ OnlineRecognizerConfig config = new OnlineRecognizerConfig();
+ config.setModelConfig(onlineModelConfig);
+ recognizer = new OnlineRecognizer(getAssets(), config);
+
+ audioRecord.startRecording();
+ startRecognition();
+ }
+
+ private void startRecognition() {
+ isRecording = true;
+ recordingThread = new Thread(this::processSamples);
+ recordingThread.start();
+ }
+
+ private void processSamples() {
+ OnlineStream stream = recognizer.createStream("");
+ double interval = 0.1;
+ int bufferSize = (int) (interval * sampleRateInHz);
+ short[] buffer = new short[bufferSize];
+
+ while (isRecording) {
+ int ret = audioRecord != null ? audioRecord.read(buffer, 0, buffer.length) : -1;
+ if (ret > 0) {
+ float[] samples = new float[ret];
+ for (int i = 0; i < ret; i++) {
+ samples[i] = buffer[i] / 32768.0f;
+ }
+ stream.acceptWaveform(samples, sampleRateInHz);
+ while (recognizer.isReady(stream)) {
+ recognizer.decode(stream);
+ }
+
+ boolean isEndpoint = recognizer.isEndpoint(stream);
+ String text = recognizer.getResult(stream).getText();
+ if (isEndpoint) {
+ float[] tailPaddings = new float[(int) (0.8 * sampleRateInHz)];
+ stream.acceptWaveform(tailPaddings, sampleRateInHz);
+ while (recognizer.isReady(stream)) {
+ recognizer.decode(stream);
+ }
+ text = recognizer.getResult(stream).getText();
+ }
+
+ String textToDisplay = lastText;
+
+ if (!TextUtils.isEmpty(text)) {
+ textToDisplay = TextUtils.isEmpty(text) ? idx + ": " + text : lastText + "\n" + idx + ": " + text;
+ }
+
+ if (isEndpoint) {
+ recognizer.reset(stream);
+ if (!TextUtils.isEmpty(text)) {
+ lastText = lastText + "\n" + idx + ": " + text;
+ textToDisplay = lastText;
+ idx += 1;
+ }
+ appViewModel.setSpeechRecognitionResult(textToDisplay);
+ }
+ }
+
+ }
+ stream.release();
+
+ }
+
+
+ @Override
+ public int onStartCommand(Intent intent, int flags, int startId) {
+
+ return START_STICKY;
+ }
+
+ @Override
+ public void onDestroy() {
+ super.onDestroy();
+ audioRecord.stop();
+ audioRecord.release();
+ executor.shutdown();
+ stopForeground(true);
+ }
+
+ @Override
+ public IBinder onBind(Intent intent) {
+ return null;
+ }
+
+
+ @SuppressLint("ForegroundServiceType")
+ private void startForegroundService() {
+ String channelId = createNotificationChannel();
+
+ Notification notification = new NotificationCompat.Builder(this, channelId)
+ .setContentTitle("Foreground Service")
+ .setContentText("Running in the foreground")
+ .setSmallIcon(R.drawable.ic_bg_mic_24)
+ .build();
+
+ startForeground(1, notification);
+ }
+
+ // 创建通知渠道 (针对 Android 8.0 及以上版本)
+ private String createNotificationChannel() {
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+ String channelId = "speech_channel";
+ String channelName = "Speech Channel";
+ NotificationChannel channel = new NotificationChannel(channelId, channelName, NotificationManager.IMPORTANCE_LOW);
+ NotificationManager manager = getSystemService(NotificationManager.class);
+ if (manager != null) {
+ manager.createNotificationChannel(channel);
+ }
+ return channelId;
+ } else {
+ return "";
+ }
+ }
+
+ private void initializeSherpaDir(String assetDir, String internalDir) {
+ AssetManager assetManager = getAssets();
+ File outDir = new File(getFilesDir(), internalDir);
+
+ if (!outDir.exists()) {
+ outDir.mkdirs();
+ }
+
+ try {
+ String[] assets = assetManager.list(assetDir);
+ if (assets != null) {
+ for (String asset : assets) {
+ String assetPath = assetDir.isEmpty() ? asset : assetDir + "/" + asset;
+ File outFile = new File(outDir, asset);
+ if (Objects.requireNonNull(assetManager.list(assetPath)).length > 0) {
+ outFile.mkdirs();
+ initializeSherpaDir(assetPath, internalDir + "/" + asset); // 递归复制子目录
+ } else {
+ InputStream in = assetManager.open(assetPath);
+ OutputStream out = new FileOutputStream(outFile);
+
+ byte[] buffer = new byte[1024];
+ int read;
+ while ((read = in.read(buffer)) != -1) {
+ out.write(buffer, 0, read);
+ }
+
+ in.close();
+ out.flush();
+ out.close();
+ }
+ }
+ }
+ } catch (IOException e) {
+ Log.e("ModelCopy", "Failed to copy assets", e);
+ }
+ }
+}
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
new file mode 100644
index 0000000000..2b068d1146
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml
new file mode 100644
index 0000000000..5eb92eb316
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml
@@ -0,0 +1,5 @@
+
+
+
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 0000000000..07d5da9cbf
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml
new file mode 100644
index 0000000000..ae3ea627e9
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml
@@ -0,0 +1,18 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 0000000000..eca70cfe52
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 0000000000..eca70cfe52
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp
new file mode 100644
index 0000000000..c209e78ecd
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..b2dfe3d1ba
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp
new file mode 100644
index 0000000000..4f0f1d64e5
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..62b611da08
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..948a3070fe
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..1b9a6956b3
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..28d4b77f9f
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9287f50836
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..aa7d6427e6
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9126ae37cb
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml
new file mode 100644
index 0000000000..20276125c9
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml
@@ -0,0 +1,16 @@
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml
new file mode 100644
index 0000000000..f8c6127d32
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml
@@ -0,0 +1,10 @@
+
+
+ #FFBB86FC
+ #FF6200EE
+ #FF3700B3
+ #FF03DAC5
+ #FF018786
+ #FF000000
+ #FFFFFFFF
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml
new file mode 100644
index 0000000000..31aa7267dd
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml
@@ -0,0 +1,3 @@
+
+ SherpaOnnxJavaDemo
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml
new file mode 100644
index 0000000000..d9f132e856
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml
@@ -0,0 +1,16 @@
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml
new file mode 100644
index 0000000000..fa0f996d2c
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml
@@ -0,0 +1,13 @@
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml
new file mode 100644
index 0000000000..9ee9997b0b
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/build.gradle b/android/SherpaOnnxJavaDemo/build.gradle
new file mode 100644
index 0000000000..5ae9a7b016
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/build.gradle
@@ -0,0 +1,9 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+ id 'com.android.application' version '7.2.2' apply false
+ id 'com.android.library' version '7.2.2' apply false
+}
+
+task clean(type: Delete) {
+ delete rootProject.buildDir
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/gradle.properties b/android/SherpaOnnxJavaDemo/gradle.properties
new file mode 100644
index 0000000000..dab7c28bff
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradle.properties
@@ -0,0 +1,21 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. More details, visit
+# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app"s APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000..e708b1c023
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..489dbeed18
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Tue Oct 22 10:59:18 CST 2024
+distributionBase=GRADLE_USER_HOME
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
+distributionPath=wrapper/dists
+zipStorePath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
diff --git a/android/SherpaOnnxJavaDemo/gradlew b/android/SherpaOnnxJavaDemo/gradlew
new file mode 100644
index 0000000000..4f906e0c81
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=`expr $i + 1`
+ done
+ case $i in
+ 0) set -- ;;
+ 1) set -- "$args0" ;;
+ 2) set -- "$args0" "$args1" ;;
+ 3) set -- "$args0" "$args1" "$args2" ;;
+ 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/android/SherpaOnnxJavaDemo/gradlew.bat b/android/SherpaOnnxJavaDemo/gradlew.bat
new file mode 100644
index 0000000000..107acd32c4
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/android/SherpaOnnxJavaDemo/settings.gradle b/android/SherpaOnnxJavaDemo/settings.gradle
new file mode 100644
index 0000000000..e552eb6899
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/settings.gradle
@@ -0,0 +1,17 @@
+pluginManagement {
+ repositories {
+ gradlePluginPortal()
+ google()
+ mavenCentral()
+ }
+}
+dependencyResolutionManagement {
+ repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+ repositories {
+ google()
+ mavenCentral()
+ maven { url 'https://jitpack.io' }
+ }
+}
+rootProject.name = "SherpaOnnxJavaDemo"
+include ':app'
diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
index b17a6ea6c7..b42937ad37 100644
--- a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
+++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
@@ -151,24 +151,27 @@ class MainActivity : AppCompatActivity() {
stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
while (kws.isReady(stream)) {
kws.decode(stream)
- }
- val text = kws.getResult(stream).keyword
+ val text = kws.getResult(stream).keyword
+
+ var textToDisplay = lastText
- var textToDisplay = lastText
+ if (text.isNotBlank()) {
+ // Remember to reset the stream right after detecting a keyword
- if (text.isNotBlank()) {
- if (lastText.isBlank()) {
- textToDisplay = "$idx: $text"
- } else {
- textToDisplay = "$idx: $text\n$lastText"
+ kws.reset(stream)
+ if (lastText.isBlank()) {
+ textToDisplay = "$idx: $text"
+ } else {
+ textToDisplay = "$idx: $text\n$lastText"
+ }
+ lastText = "$idx: $text\n$lastText"
+ idx += 1
}
- lastText = "$idx: $text\n$lastText"
- idx += 1
- }
- runOnUiThread {
- textView.text = textToDisplay
+ runOnUiThread {
+ textView.text = textToDisplay
+ }
}
}
}
diff --git a/android/SherpaOnnxSpeakerDiarization/.gitignore b/android/SherpaOnnxSpeakerDiarization/.gitignore
new file mode 100644
index 0000000000..aa724b7707
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/.gitignore
@@ -0,0 +1,15 @@
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
diff --git a/android/SherpaOnnxSpeakerDiarization/app/.gitignore b/android/SherpaOnnxSpeakerDiarization/app/.gitignore
new file mode 100644
index 0000000000..42afabfd2a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts
new file mode 100644
index 0000000000..7a390ba425
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts
@@ -0,0 +1,71 @@
+plugins {
+ alias(libs.plugins.android.application)
+ alias(libs.plugins.jetbrains.kotlin.android)
+}
+
+android {
+ namespace = "com.k2fsa.sherpa.onnx.speaker.diarization"
+ compileSdk = 34
+
+ defaultConfig {
+ applicationId = "com.k2fsa.sherpa.onnx.speaker.diarization"
+ minSdk = 21
+ targetSdk = 34
+ versionCode = 1
+ versionName = "1.0"
+
+ testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+ vectorDrawables {
+ useSupportLibrary = true
+ }
+ }
+
+ buildTypes {
+ release {
+ isMinifyEnabled = false
+ proguardFiles(
+ getDefaultProguardFile("proguard-android-optimize.txt"),
+ "proguard-rules.pro"
+ )
+ }
+ }
+ compileOptions {
+ sourceCompatibility = JavaVersion.VERSION_1_8
+ targetCompatibility = JavaVersion.VERSION_1_8
+ }
+ kotlinOptions {
+ jvmTarget = "1.8"
+ }
+ buildFeatures {
+ compose = true
+ }
+ composeOptions {
+ kotlinCompilerExtensionVersion = "1.5.1"
+ }
+ packaging {
+ resources {
+ excludes += "/META-INF/{AL2.0,LGPL2.1}"
+ }
+ }
+}
+
+dependencies {
+
+ implementation(libs.androidx.core.ktx)
+ implementation(libs.androidx.lifecycle.runtime.ktx)
+ implementation(libs.androidx.activity.compose)
+ implementation(platform(libs.androidx.compose.bom))
+ implementation(libs.androidx.ui)
+ implementation(libs.androidx.ui.graphics)
+ implementation(libs.androidx.ui.tooling.preview)
+ implementation(libs.androidx.material3)
+ implementation(libs.androidx.navigation.compose)
+ implementation(libs.androidx.documentfile)
+ testImplementation(libs.junit)
+ androidTestImplementation(libs.androidx.junit)
+ androidTestImplementation(libs.androidx.espresso.core)
+ androidTestImplementation(platform(libs.androidx.compose.bom))
+ androidTestImplementation(libs.androidx.ui.test.junit4)
+ debugImplementation(libs.androidx.ui.tooling)
+ debugImplementation(libs.androidx.ui.test.manifest)
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro
new file mode 100644
index 0000000000..481bb43481
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+# http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+# public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt
new file mode 100644
index 0000000000..53d7af15fc
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt
@@ -0,0 +1,24 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import androidx.test.platform.app.InstrumentationRegistry
+import androidx.test.ext.junit.runners.AndroidJUnit4
+
+import org.junit.Test
+import org.junit.runner.RunWith
+
+import org.junit.Assert.*
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+@RunWith(AndroidJUnit4::class)
+class ExampleInstrumentedTest {
+ @Test
+ fun useAppContext() {
+ // Context of the app under test.
+ val appContext = InstrumentationRegistry.getInstrumentation().targetContext
+ assertEquals("com.k2fsa.sherpa.onnx.speaker.diarization", appContext.packageName)
+ }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..d58f7e8d77
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt
new file mode 100644
index 0000000000..0895cf52cf
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt
@@ -0,0 +1,13 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import androidx.compose.ui.graphics.vector.ImageVector
+
+data class BarItem(
+ val title: String,
+
+ // see https://www.composables.com/icons
+ // and
+ // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
+ val image: ImageVector,
+ val route: String,
+)
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt
new file mode 100644
index 0000000000..7a25d49b9a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt
@@ -0,0 +1,132 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import android.os.Bundle
+import androidx.activity.ComponentActivity
+import androidx.activity.compose.setContent
+import androidx.activity.enableEdgeToEdge
+import androidx.compose.foundation.layout.Column
+import androidx.compose.foundation.layout.fillMaxSize
+import androidx.compose.foundation.layout.padding
+import androidx.compose.material3.CenterAlignedTopAppBar
+import androidx.compose.material3.ExperimentalMaterial3Api
+import androidx.compose.material3.Icon
+import androidx.compose.material3.MaterialTheme
+import androidx.compose.material3.NavigationBar
+import androidx.compose.material3.NavigationBarItem
+import androidx.compose.material3.Scaffold
+import androidx.compose.material3.Surface
+import androidx.compose.material3.Text
+import androidx.compose.material3.TopAppBarDefaults
+import androidx.compose.runtime.Composable
+import androidx.compose.runtime.getValue
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.text.font.FontWeight
+import androidx.compose.ui.tooling.preview.Preview
+import androidx.navigation.NavGraph.Companion.findStartDestination
+import androidx.navigation.NavHostController
+import androidx.navigation.compose.NavHost
+import androidx.navigation.compose.composable
+import androidx.navigation.compose.currentBackStackEntryAsState
+import androidx.navigation.compose.rememberNavController
+import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HelpScreen
+import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HomeScreen
+import com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme.SherpaOnnxSpeakerDiarizationTheme
+
+const val TAG = "sherpa-onnx-sd"
+
+class MainActivity : ComponentActivity() {
+ override fun onCreate(savedInstanceState: Bundle?) {
+ super.onCreate(savedInstanceState)
+ enableEdgeToEdge()
+ setContent {
+ SherpaOnnxSpeakerDiarizationTheme {
+ // A surface container using the 'background' color from the theme
+ Surface(
+ modifier = Modifier.fillMaxSize(),
+ color = MaterialTheme.colorScheme.background
+ ) {
+ MainScreen()
+ }
+ }
+ }
+ SpeakerDiarizationObject.initSpeakerDiarization(this.assets)
+ }
+}
+
+@OptIn(ExperimentalMaterial3Api::class)
+@Composable
+fun MainScreen(modifier: Modifier = Modifier) {
+ val navController = rememberNavController()
+ Scaffold(
+ topBar = {
+ CenterAlignedTopAppBar(
+ colors = TopAppBarDefaults.topAppBarColors(
+ containerColor = MaterialTheme.colorScheme.primaryContainer,
+ titleContentColor = MaterialTheme.colorScheme.primary,
+ ),
+ title = {
+ Text(
+ "Next-gen Kaldi: Speaker Diarization",
+ fontWeight = FontWeight.Bold,
+ )
+ },
+ )
+ },
+ content = { padding ->
+ Column(Modifier.padding(padding)) {
+ NavigationHost(navController = navController)
+
+ }
+ },
+ bottomBar = {
+ BottomNavigationBar(navController = navController)
+ }
+ )
+}
+
+@Composable
+fun NavigationHost(navController: NavHostController) {
+ NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
+ composable(NavRoutes.Home.route) {
+ HomeScreen()
+ }
+
+ composable(NavRoutes.Help.route) {
+ HelpScreen()
+ }
+ }
+}
+
+@Composable
+fun BottomNavigationBar(navController: NavHostController) {
+ NavigationBar {
+ val backStackEntry by navController.currentBackStackEntryAsState()
+ val currentRoute = backStackEntry?.destination?.route
+
+ NavBarItems.BarItems.forEach { navItem ->
+ NavigationBarItem(selected = currentRoute == navItem.route,
+ onClick = {
+ navController.navigate(navItem.route) {
+ popUpTo(navController.graph.findStartDestination().id) {
+ saveState = true
+ }
+ launchSingleTop = true
+ restoreState = true
+ }
+ },
+ icon = {
+ Icon(imageVector = navItem.image, contentDescription = navItem.title)
+ }, label = {
+ Text(text = navItem.title)
+ })
+ }
+ }
+}
+
+@Preview(showBackground = true)
+@Composable
+fun MainScreenPreview() {
+ SherpaOnnxSpeakerDiarizationTheme {
+ MainScreen()
+ }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt
new file mode 100644
index 0000000000..65c737f971
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt
@@ -0,0 +1,20 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import androidx.compose.material.icons.Icons
+import androidx.compose.material.icons.filled.Home
+import androidx.compose.material.icons.filled.Info
+
+object NavBarItems {
+ val BarItems = listOf(
+ BarItem(
+ title = "Home",
+ image = Icons.Filled.Home,
+ route = "home",
+ ),
+ BarItem(
+ title = "Help",
+ image = Icons.Filled.Info,
+ route = "help",
+ ),
+ )
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt
new file mode 100644
index 0000000000..2e1ae90b51
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt
@@ -0,0 +1,6 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+sealed class NavRoutes(val route: String) {
+ object Home : NavRoutes("home")
+ object Help : NavRoutes("help")
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt
new file mode 120000
index 0000000000..459cc22ccd
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt
@@ -0,0 +1 @@
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt
new file mode 100644
index 0000000000..940a2b6434
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt
@@ -0,0 +1,137 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.screens
+
+import android.content.Context
+import android.media.AudioFormat
+import android.media.MediaCodec
+import android.media.MediaExtractor
+import android.media.MediaFormat
+import android.net.Uri
+
+data class WaveData(
+ val sampleRate: Int? = null,
+ val samples: FloatArray? = null,
+ val msg: String? = null
+)
+
+// It supports only 16-bit encoded wave files
+//
+// References
+// - https://gist.github.com/a-m-s/1991ab18fbcb0fcc2cf9
+// - https://github.com/taehwandev/MediaCodecExample/blob/master/app/src/main/java/tech/thdev/mediacodecexample/audio/AACAudioDecoderThread.kt
+fun readUri(context: Context, uri: Uri): WaveData {
+ val extractor = MediaExtractor()
+ extractor.setDataSource(context, uri, null)
+
+ val samplesList: MutableList = ArrayList()
+
+ for (i in 0 until extractor.trackCount) {
+ val format = extractor.getTrackFormat(i)
+ val mime = format.getString(MediaFormat.KEY_MIME)
+ if (mime?.startsWith("audio/") == true) {
+ extractor.selectTrack(i)
+
+ var encoding: Int = -1
+ try {
+ encoding = format.getInteger(MediaFormat.KEY_PCM_ENCODING)
+ } catch (_: Exception) {
+ }
+
+ if (encoding != AudioFormat.ENCODING_PCM_16BIT) {
+ return WaveData(msg = "We support only 16-bit encoded wave files")
+ }
+
+ val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
+ val decoder = MediaCodec.createDecoderByType(mime)
+ decoder.configure(format, null, null, 0)
+ decoder.start()
+
+ val inputBuffers = decoder.inputBuffers
+ var outputBuffers = decoder.outputBuffers
+
+ val info = MediaCodec.BufferInfo()
+ var eof = false
+
+ var outputBufferIndex = -1
+
+ while (true) {
+ if (!eof) {
+ val inputBufferIndex = decoder.dequeueInputBuffer(10000)
+ if (inputBufferIndex > 0) {
+ val size = extractor.readSampleData(inputBuffers[inputBufferIndex], 0)
+ if (size < 0) {
+ decoder.queueInputBuffer(
+ inputBufferIndex,
+ 0,
+ 0,
+ 0,
+ MediaCodec.BUFFER_FLAG_END_OF_STREAM
+ )
+ eof = true
+ } else {
+ decoder.queueInputBuffer(
+ inputBufferIndex,
+ 0,
+ size,
+ extractor.sampleTime,
+ 0
+ )
+ extractor.advance()
+ }
+ }
+ } // if (!eof)
+
+ if (outputBufferIndex >= 0) {
+ outputBuffers[outputBufferIndex].position(0)
+ }
+
+ outputBufferIndex = decoder.dequeueOutputBuffer(info, 10000)
+ if (outputBufferIndex >= 0) {
+ if (info.flags != 0) {
+ decoder.stop()
+ decoder.release()
+
+ var k = 0
+ for (s in samplesList) {
+ k += s.size
+ }
+ if (k == 0) {
+ return WaveData(msg = "Failed to read selected file")
+ }
+
+ val ans = FloatArray(k)
+ k = 0
+ for (s in samplesList) {
+ s.copyInto(ans, k)
+ k += s.size
+ }
+
+ return WaveData(sampleRate = sampleRate, samples = ans)
+ }
+
+ val buffer = outputBuffers[outputBufferIndex]
+ val chunk = ByteArray(info.size)
+ buffer[chunk]
+ buffer.clear()
+
+ val numSamples = info.size / 2
+
+ val samples = FloatArray(numSamples)
+ for (k in 0 until numSamples) {
+ // assume little endian
+ val s = chunk[2 * k] + (chunk[2 * k + 1] * 256.0f)
+
+ samples[k] = s / 32768.0f
+ }
+ samplesList.add(samples)
+
+ decoder.releaseOutputBuffer(outputBufferIndex, false)
+ } else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
+ outputBuffers = decoder.outputBuffers
+ }
+ }
+ }
+ }
+
+ extractor.release()
+ return WaveData(msg = "not an audio file")
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt
new file mode 100644
index 0000000000..9df6bd5616
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt
@@ -0,0 +1,67 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import android.content.res.AssetManager
+import android.util.Log
+import com.k2fsa.sherpa.onnx.FastClusteringConfig
+import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarization
+import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarizationConfig
+import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationModelConfig
+import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationPyannoteModelConfig
+import com.k2fsa.sherpa.onnx.SpeakerEmbeddingExtractorConfig
+
+// Please download
+// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+// then unzip it, rename model.onnx to segmentation.onnx, and mv
+// segmentation.onnx to the assets folder
+val segmentationModel = "segmentation.onnx"
+
+// please download it from
+// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+// and rename it to embedding.onnx
+// and move it to the assets folder
+val embeddingModel = "embedding.onnx"
+
+// in the end, your assets folder should look like below
+/*
+(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets
+(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
+total 89048
+-rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 embedding.onnx
+-rw-r--r-- 1 fangjun staff 5.7M Oct 12 20:28 segmentation.onnx
+ */
+
+object SpeakerDiarizationObject {
+ var _sd: OfflineSpeakerDiarization? = null
+ val sd: OfflineSpeakerDiarization
+ get() {
+ return _sd!!
+ }
+
+ fun initSpeakerDiarization(assetManager: AssetManager? = null) {
+ synchronized(this) {
+ if (_sd != null) {
+ return
+ }
+ Log.i(TAG, "Initializing sherpa-onnx speaker diarization")
+
+ val config = OfflineSpeakerDiarizationConfig(
+ segmentation = OfflineSpeakerSegmentationModelConfig(
+ pyannote = OfflineSpeakerSegmentationPyannoteModelConfig(
+ segmentationModel
+ ),
+ debug = true,
+ ),
+ embedding = SpeakerEmbeddingExtractorConfig(
+ model = embeddingModel,
+ debug = true,
+ numThreads = 2,
+ ),
+ clustering = FastClusteringConfig(numClusters = -1, threshold = 0.5f),
+ minDurationOn = 0.2f,
+ minDurationOff = 0.5f,
+ )
+ _sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config)
+ }
+ }
+}
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt
new file mode 120000
index 0000000000..9bab8fe88a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt
new file mode 100644
index 0000000000..b3640b9e97
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt
@@ -0,0 +1,38 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.screens
+
+import androidx.compose.foundation.layout.Box
+import androidx.compose.foundation.layout.Column
+import androidx.compose.foundation.layout.Spacer
+import androidx.compose.foundation.layout.fillMaxSize
+import androidx.compose.foundation.layout.height
+import androidx.compose.foundation.layout.padding
+import androidx.compose.material3.Text
+import androidx.compose.runtime.Composable
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.unit.dp
+import androidx.compose.ui.unit.sp
+
+@Composable
+fun HelpScreen() {
+ Box(modifier = Modifier.fillMaxSize()) {
+ Column(
+ modifier = Modifier.padding(8.dp)
+ ) {
+ Text(
+ "This app accepts only 16kHz 16-bit 1-channel *.wav files. " +
+ "It has two arguments: Number of speakers and clustering threshold. " +
+ "If you know the actual number of speakers in the file, please set it. " +
+ "Otherwise, please set it to 0. In that case, you have to set the threshold. " +
+ "A larger threshold leads to fewer segmented speakers."
+ )
+ Spacer(modifier = Modifier.height(5.dp))
+ Text("The speaker segmentation model is from " +
+ "pyannote-audio (https://huggingface.co/pyannote/segmentation-3.0), "+
+ "whereas the embedding extractor model is from 3D-Speaker (https://github.com/modelscope/3D-Speaker)")
+ Spacer(modifier = Modifier.height(5.dp))
+ Text("Please see http://github.com/k2-fsa/sherpa-onnx ")
+ Spacer(modifier = Modifier.height(5.dp))
+ Text("Everything is open-sourced!", fontSize = 20.sp)
+ }
+ }
+}
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt
new file mode 100644
index 0000000000..a5a9cd31c9
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt
@@ -0,0 +1,210 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.screens
+
+import android.util.Log
+import androidx.activity.compose.rememberLauncherForActivityResult
+import androidx.activity.result.contract.ActivityResultContracts
+import androidx.compose.foundation.layout.Arrangement
+import androidx.compose.foundation.layout.Column
+import androidx.compose.foundation.layout.Row
+import androidx.compose.foundation.layout.Spacer
+import androidx.compose.foundation.layout.fillMaxWidth
+import androidx.compose.foundation.layout.padding
+import androidx.compose.foundation.layout.size
+import androidx.compose.foundation.rememberScrollState
+import androidx.compose.foundation.verticalScroll
+import androidx.compose.material3.Button
+import androidx.compose.material3.OutlinedTextField
+import androidx.compose.material3.Text
+import androidx.compose.runtime.Composable
+import androidx.compose.runtime.getValue
+import androidx.compose.runtime.mutableStateOf
+import androidx.compose.runtime.remember
+import androidx.compose.runtime.setValue
+import androidx.compose.ui.Alignment
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.platform.LocalClipboardManager
+import androidx.compose.ui.platform.LocalContext
+import androidx.compose.ui.text.AnnotatedString
+import androidx.compose.ui.unit.dp
+import androidx.compose.ui.unit.sp
+import androidx.documentfile.provider.DocumentFile
+import com.k2fsa.sherpa.onnx.speaker.diarization.SpeakerDiarizationObject
+import com.k2fsa.sherpa.onnx.speaker.diarization.TAG
+import kotlin.concurrent.thread
+
+
+private var samples: FloatArray? = null
+
+@Composable
+fun HomeScreen() {
+ val context = LocalContext.current
+
+ var sampleRate: Int
+ var filename by remember { mutableStateOf("") }
+ var status by remember { mutableStateOf("") }
+ var progress by remember { mutableStateOf("") }
+ val clipboardManager = LocalClipboardManager.current
+ var done by remember { mutableStateOf(false) }
+ var fileIsOk by remember { mutableStateOf(false) }
+ var started by remember { mutableStateOf(false) }
+ var numSpeakers by remember { mutableStateOf(0) }
+ var threshold by remember { mutableStateOf(0.5f) }
+
+
+ val callback = here@{ numProcessedChunks: Int, numTotalChunks: Int, arg: Long ->
+ Int
+ val percent = 100.0 * numProcessedChunks / numTotalChunks
+ progress = "%.2f%%".format(percent)
+ Log.i(TAG, progress)
+ return@here 0
+ }
+
+ val launcher = rememberLauncherForActivityResult(ActivityResultContracts.OpenDocument()) {
+ it?.let {
+ val documentFile = DocumentFile.fromSingleUri(context, it)
+ filename = documentFile?.name ?: ""
+
+ progress = ""
+ done = false
+ fileIsOk = false
+
+ if (filename.isNotEmpty()) {
+ val data = readUri(context, it)
+ Log.i(TAG, "sample rate: ${data.sampleRate}")
+ Log.i(TAG, "numSamples: ${data.samples?.size ?: 0}")
+ if (data.msg != null) {
+ Log.i(TAG, "failed to read $filename")
+ status = data.msg
+ } else if (data.sampleRate != SpeakerDiarizationObject.sd.sampleRate()) {
+ status =
+ "Expected sample rate: ${SpeakerDiarizationObject.sd.sampleRate()}. Given wave file with sample rate: ${data.sampleRate}"
+ } else {
+ samples = data.samples!!
+ fileIsOk = true
+ }
+ }
+ }
+ }
+
+ Column(
+ modifier = Modifier.padding(10.dp),
+ verticalArrangement = Arrangement.Top,
+ ) {
+ Row(
+ modifier = Modifier.fillMaxWidth(),
+ horizontalArrangement = Arrangement.SpaceEvenly,
+ verticalAlignment = Alignment.CenterVertically
+ ) {
+
+ Button(onClick = {
+ launcher.launch(arrayOf("audio/*"))
+ }) {
+ Text("Select a .wav file")
+ }
+
+ Button(enabled = fileIsOk && !started,
+ onClick = {
+ Log.i(TAG, "started")
+ Log.i(TAG, "num samples: ${samples?.size}")
+ started = true
+ progress = ""
+
+ val config = SpeakerDiarizationObject.sd.config
+ config.clustering.numClusters = numSpeakers
+ config.clustering.threshold = threshold
+
+ SpeakerDiarizationObject.sd.setConfig(config)
+
+ thread(true) {
+ done = false
+ status = "Started! Please wait"
+ val segments = SpeakerDiarizationObject.sd.processWithCallback(
+ samples!!,
+ callback = callback,
+ )
+ done = true
+ started = false
+ status = ""
+ for (s in segments) {
+ val start = "%.2f".format(s.start)
+ val end = "%.2f".format(s.end)
+ val speaker = "speaker_%02d".format(s.speaker)
+ status += "$start -- $end $speaker\n"
+ Log.i(TAG, "$start -- $end $speaker")
+ }
+
+ Log.i(TAG, status)
+ }
+ }) {
+ Text("Start")
+ }
+ if (progress.isNotEmpty()) {
+ Text(progress, fontSize = 25.sp)
+ }
+ }
+
+ Row(
+ modifier = Modifier.fillMaxWidth(),
+ horizontalArrangement = Arrangement.SpaceEvenly,
+ verticalAlignment = Alignment.CenterVertically
+ ) {
+ OutlinedTextField(
+ value = numSpeakers.toString(),
+ onValueChange = {
+ if (it.isEmpty() || it.isBlank()) {
+ numSpeakers = 0
+ } else {
+ numSpeakers = it.toIntOrNull() ?: 0
+ }
+ },
+ label = {
+ Text("Number of Speakers")
+ },
+ )
+ }
+
+ Row(
+ modifier = Modifier.fillMaxWidth(),
+ horizontalArrangement = Arrangement.SpaceEvenly,
+ verticalAlignment = Alignment.CenterVertically
+ ) {
+ OutlinedTextField(
+ value = threshold.toString(),
+ onValueChange = {
+ if (it.isEmpty() || it.isBlank()) {
+ threshold = 0.5f
+ } else {
+ threshold = it.toFloatOrNull() ?: 0.5f
+ }
+ },
+ label = {
+ Text("Clustering threshold")
+ },
+ )
+ }
+
+ if (filename.isNotEmpty()) {
+ Text(text = "Selected $filename")
+ Spacer(Modifier.size(20.dp))
+ }
+
+ if (done) {
+ Button(onClick = {
+ clipboardManager.setText(AnnotatedString(status))
+ progress = "Copied!"
+ }) {
+ Text("Copy result")
+ }
+ Spacer(Modifier.size(20.dp))
+ }
+
+ if (status.isNotEmpty()) {
+ Text(
+ status,
+ modifier = Modifier.verticalScroll(rememberScrollState()),
+ )
+ }
+
+
+ }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt
new file mode 100644
index 0000000000..a96515d3da
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt
@@ -0,0 +1,11 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
+
+import androidx.compose.ui.graphics.Color
+
+val Purple80 = Color(0xFFD0BCFF)
+val PurpleGrey80 = Color(0xFFCCC2DC)
+val Pink80 = Color(0xFFEFB8C8)
+
+val Purple40 = Color(0xFF6650a4)
+val PurpleGrey40 = Color(0xFF625b71)
+val Pink40 = Color(0xFF7D5260)
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt
new file mode 100644
index 0000000000..5dbbe7e59e
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt
@@ -0,0 +1,58 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
+
+import android.app.Activity
+import android.os.Build
+import androidx.compose.foundation.isSystemInDarkTheme
+import androidx.compose.material3.MaterialTheme
+import androidx.compose.material3.darkColorScheme
+import androidx.compose.material3.dynamicDarkColorScheme
+import androidx.compose.material3.dynamicLightColorScheme
+import androidx.compose.material3.lightColorScheme
+import androidx.compose.runtime.Composable
+import androidx.compose.ui.platform.LocalContext
+
+private val DarkColorScheme = darkColorScheme(
+ primary = Purple80,
+ secondary = PurpleGrey80,
+ tertiary = Pink80
+)
+
+private val LightColorScheme = lightColorScheme(
+ primary = Purple40,
+ secondary = PurpleGrey40,
+ tertiary = Pink40
+
+ /* Other default colors to override
+ background = Color(0xFFFFFBFE),
+ surface = Color(0xFFFFFBFE),
+ onPrimary = Color.White,
+ onSecondary = Color.White,
+ onTertiary = Color.White,
+ onBackground = Color(0xFF1C1B1F),
+ onSurface = Color(0xFF1C1B1F),
+ */
+)
+
+@Composable
+fun SherpaOnnxSpeakerDiarizationTheme(
+ darkTheme: Boolean = isSystemInDarkTheme(),
+ // Dynamic color is available on Android 12+
+ dynamicColor: Boolean = true,
+ content: @Composable () -> Unit
+) {
+ val colorScheme = when {
+ dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
+ val context = LocalContext.current
+ if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
+ }
+
+ darkTheme -> DarkColorScheme
+ else -> LightColorScheme
+ }
+
+ MaterialTheme(
+ colorScheme = colorScheme,
+ typography = Typography,
+ content = content
+ )
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt
new file mode 100644
index 0000000000..39a81b9418
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt
@@ -0,0 +1,34 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
+
+import androidx.compose.material3.Typography
+import androidx.compose.ui.text.TextStyle
+import androidx.compose.ui.text.font.FontFamily
+import androidx.compose.ui.text.font.FontWeight
+import androidx.compose.ui.unit.sp
+
+// Set of Material typography styles to start with
+val Typography = Typography(
+ bodyLarge = TextStyle(
+ fontFamily = FontFamily.Default,
+ fontWeight = FontWeight.Normal,
+ fontSize = 16.sp,
+ lineHeight = 24.sp,
+ letterSpacing = 0.5.sp
+ )
+ /* Other default text styles to override
+ titleLarge = TextStyle(
+ fontFamily = FontFamily.Default,
+ fontWeight = FontWeight.Normal,
+ fontSize = 22.sp,
+ lineHeight = 28.sp,
+ letterSpacing = 0.sp
+ ),
+ labelSmall = TextStyle(
+ fontFamily = FontFamily.Default,
+ fontWeight = FontWeight.Medium,
+ fontSize = 11.sp,
+ lineHeight = 16.sp,
+ letterSpacing = 0.5.sp
+ )
+ */
+)
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
new file mode 100644
index 0000000000..2b068d1146
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 0000000000..07d5da9cbf
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 0000000000..6f3b755bf5
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 0000000000..6f3b755bf5
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp
new file mode 100644
index 0000000000..c209e78ecd
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..b2dfe3d1ba
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp
new file mode 100644
index 0000000000..4f0f1d64e5
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..62b611da08
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..948a3070fe
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..1b9a6956b3
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..28d4b77f9f
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9287f50836
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..aa7d6427e6
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9126ae37cb
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml
new file mode 100644
index 0000000000..f8c6127d32
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml
@@ -0,0 +1,10 @@
+
+
+ #FFBB86FC
+ #FF6200EE
+ #FF3700B3
+ #FF03DAC5
+ #FF018786
+ #FF000000
+ #FFFFFFFF
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml
new file mode 100644
index 0000000000..05f2df0901
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml
@@ -0,0 +1,3 @@
+
+ SherpaOnnxSpeakerDiarization
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml
new file mode 100644
index 0000000000..34d1d96ed3
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml
new file mode 100644
index 0000000000..fa0f996d2c
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml
@@ -0,0 +1,13 @@
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml
new file mode 100644
index 0000000000..9ee9997b0b
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt b/android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt
new file mode 100644
index 0000000000..5571dbb56c
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt
@@ -0,0 +1,17 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import org.junit.Test
+
+import org.junit.Assert.*
+
+/**
+ * Example local unit test, which will execute on the development machine (host).
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+class ExampleUnitTest {
+ @Test
+ fun addition_isCorrect() {
+ assertEquals(4, 2 + 2)
+ }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/build.gradle.kts b/android/SherpaOnnxSpeakerDiarization/build.gradle.kts
new file mode 100644
index 0000000000..f74b04bf26
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/build.gradle.kts
@@ -0,0 +1,5 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+ alias(libs.plugins.android.application) apply false
+ alias(libs.plugins.jetbrains.kotlin.android) apply false
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle.properties b/android/SherpaOnnxSpeakerDiarization/gradle.properties
new file mode 100644
index 0000000000..20e2a01520
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradle.properties
@@ -0,0 +1,23 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. For more details, visit
+# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app's APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Kotlin code style for this project: "official" or "obsolete":
+kotlin.code.style=official
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml b/android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml
new file mode 100644
index 0000000000..fe93be92df
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml
@@ -0,0 +1,35 @@
+[versions]
+agp = "8.4.0"
+kotlin = "1.9.0"
+coreKtx = "1.10.1"
+junit = "4.13.2"
+junitVersion = "1.1.5"
+espressoCore = "3.5.1"
+lifecycleRuntimeKtx = "2.6.1"
+activityCompose = "1.8.0"
+composeBom = "2023.08.00"
+navigationCompose = "2.8.2"
+documentfile = "1.0.1"
+
+[libraries]
+androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
+junit = { group = "junit", name = "junit", version.ref = "junit" }
+androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
+androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
+androidx-lifecycle-runtime-ktx = { group = "androidx.lifecycle", name = "lifecycle-runtime-ktx", version.ref = "lifecycleRuntimeKtx" }
+androidx-activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" }
+androidx-compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" }
+androidx-ui = { group = "androidx.compose.ui", name = "ui" }
+androidx-ui-graphics = { group = "androidx.compose.ui", name = "ui-graphics" }
+androidx-ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" }
+androidx-ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" }
+androidx-ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" }
+androidx-ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" }
+androidx-material3 = { group = "androidx.compose.material3", name = "material3" }
+androidx-navigation-compose = { group = "androidx.navigation", name = "navigation-compose", version.ref = "navigationCompose" }
+androidx-documentfile = { group = "androidx.documentfile", name = "documentfile", version.ref = "documentfile" }
+
+[plugins]
+android-application = { id = "com.android.application", version.ref = "agp" }
+jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
+
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000..e708b1c023
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..a46693001c
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Sat Oct 12 14:27:04 CST 2024
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/android/SherpaOnnxSpeakerDiarization/gradlew b/android/SherpaOnnxSpeakerDiarization/gradlew
new file mode 100755
index 0000000000..4f906e0c81
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=`expr $i + 1`
+ done
+ case $i in
+ 0) set -- ;;
+ 1) set -- "$args0" ;;
+ 2) set -- "$args0" "$args1" ;;
+ 3) set -- "$args0" "$args1" "$args2" ;;
+ 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/android/SherpaOnnxSpeakerDiarization/gradlew.bat b/android/SherpaOnnxSpeakerDiarization/gradlew.bat
new file mode 100644
index 0000000000..ac1b06f938
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/android/SherpaOnnxSpeakerDiarization/settings.gradle.kts b/android/SherpaOnnxSpeakerDiarization/settings.gradle.kts
new file mode 100644
index 0000000000..7226b5499a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/settings.gradle.kts
@@ -0,0 +1,23 @@
+pluginManagement {
+ repositories {
+ google {
+ content {
+ includeGroupByRegex("com\\.android.*")
+ includeGroupByRegex("com\\.google.*")
+ includeGroupByRegex("androidx.*")
+ }
+ }
+ mavenCentral()
+ gradlePluginPortal()
+ }
+}
+dependencyResolutionManagement {
+ repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+ repositories {
+ google()
+ mavenCentral()
+ }
+}
+
+rootProject.name = "SherpaOnnxSpeakerDiarization"
+include(":app")
diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/SpeakerEmbeddingExtractorConfig.kt
new file mode 120000
index 0000000000..9bab8fe88a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
index b95ad7d787..99e49e782d 100644
--- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
+++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
@@ -183,6 +183,9 @@ class MainActivity : AppCompatActivity() {
private fun initTts() {
var modelDir: String?
var modelName: String?
+ var acousticModelName: String?
+ var vocoder: String?
+ var voices: String?
var ruleFsts: String?
var ruleFars: String?
var lexicon: String?
@@ -193,8 +196,22 @@ class MainActivity : AppCompatActivity() {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
- modelDir = null
+
+ // VITS -- begin
modelName = null
+ // VITS -- end
+
+ // Matcha -- begin
+ acousticModelName = null
+ vocoder = null
+ // Matcha -- end
+
+ // For Kokoro -- begin
+ voices = null
+ // For Kokoro -- end
+
+
+ modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
@@ -217,7 +234,6 @@ class MainActivity : AppCompatActivity() {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
- // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
@@ -233,24 +249,67 @@ class MainActivity : AppCompatActivity() {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
+ // Example 6
+ // vits-melo-tts-zh_en
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
+ // modelDir = "vits-melo-tts-zh_en"
+ // modelName = "model.onnx"
+ // lexicon = "lexicon.txt"
+ // dictDir = "vits-melo-tts-zh_en/dict"
+
+ // Example 7
+ // matcha-icefall-zh-baker
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+ // modelDir = "matcha-icefall-zh-baker"
+ // acousticModelName = "model-steps-3.onnx"
+ // vocoder = "hifigan_v2.onnx"
+ // lexicon = "lexicon.txt"
+ // dictDir = "matcha-icefall-zh-baker/dict"
+
+ // Example 8
+ // matcha-icefall-en_US-ljspeech
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ // modelDir = "matcha-icefall-en_US-ljspeech"
+ // acousticModelName = "model-steps-3.onnx"
+ // vocoder = "hifigan_v2.onnx"
+ // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
+
+ // Example 9
+ // kokoro-en-v0_19
+ // modelDir = "kokoro-en-v0_19"
+ // modelName = "model.onnx"
+ // voices = "voices.bin"
+ // dataDir = "kokoro-en-v0_19/espeak-ng-data"
+
+ // Example 10
+ // kokoro-multi-lang-v1_0
+ // modelDir = "kokoro-multi-lang-v1_0"
+ // modelName = "model.onnx"
+ // voices = "voices.bin"
+ // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
+ // dictDir = "kokoro-multi-lang-v1_0/dict"
+ // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
+ // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
+
if (dataDir != null) {
- val newDir = copyDataDir(modelDir!!)
- modelDir = newDir + "/" + modelDir
- dataDir = newDir + "/" + dataDir
- assets = null
+ val newDir = copyDataDir(dataDir!!)
+ dataDir = "$newDir/$dataDir"
}
if (dictDir != null) {
- val newDir = copyDataDir(modelDir!!)
- modelDir = newDir + "/" + modelDir
- dictDir = modelDir + "/" + "dict"
- ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
- assets = null
+ val newDir = copyDataDir(dictDir!!)
+ dictDir = "$newDir/$dictDir"
+ if (ruleFsts == null) {
+ ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
+ }
}
val config = getOfflineTtsConfig(
modelDir = modelDir!!,
- modelName = modelName!!,
+ modelName = modelName ?: "",
+ acousticModelName = acousticModelName ?: "",
+ vocoder = vocoder ?: "",
+ voices = voices ?: "",
lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
deleted file mode 100644
index 4f9c4b6f6b..0000000000
--- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright (c) 2023 Xiaomi Corporation
-package com.k2fsa.sherpa.onnx
-
-import android.content.res.AssetManager
-
-data class OfflineTtsVitsModelConfig(
- var model: String,
- var lexicon: String = "",
- var tokens: String,
- var dataDir: String = "",
- var dictDir: String = "",
- var noiseScale: Float = 0.667f,
- var noiseScaleW: Float = 0.8f,
- var lengthScale: Float = 1.0f,
-)
-
-data class OfflineTtsModelConfig(
- var vits: OfflineTtsVitsModelConfig,
- var numThreads: Int = 1,
- var debug: Boolean = false,
- var provider: String = "cpu",
-)
-
-data class OfflineTtsConfig(
- var model: OfflineTtsModelConfig,
- var ruleFsts: String = "",
- var ruleFars: String = "",
- var maxNumSentences: Int = 1,
-)
-
-class GeneratedAudio(
- val samples: FloatArray,
- val sampleRate: Int,
-) {
- fun save(filename: String) =
- saveImpl(filename = filename, samples = samples, sampleRate = sampleRate)
-
- private external fun saveImpl(
- filename: String,
- samples: FloatArray,
- sampleRate: Int
- ): Boolean
-}
-
-class OfflineTts(
- assetManager: AssetManager? = null,
- var config: OfflineTtsConfig,
-) {
- private var ptr: Long
-
- init {
- ptr = if (assetManager != null) {
- newFromAsset(assetManager, config)
- } else {
- newFromFile(config)
- }
- }
-
- fun sampleRate() = getSampleRate(ptr)
-
- fun numSpeakers() = getNumSpeakers(ptr)
-
- fun generate(
- text: String,
- sid: Int = 0,
- speed: Float = 1.0f
- ): GeneratedAudio {
- val objArray = generateImpl(ptr, text = text, sid = sid, speed = speed)
- return GeneratedAudio(
- samples = objArray[0] as FloatArray,
- sampleRate = objArray[1] as Int
- )
- }
-
- fun generateWithCallback(
- text: String,
- sid: Int = 0,
- speed: Float = 1.0f,
- callback: (samples: FloatArray) -> Int
- ): GeneratedAudio {
- val objArray = generateWithCallbackImpl(
- ptr,
- text = text,
- sid = sid,
- speed = speed,
- callback = callback
- )
- return GeneratedAudio(
- samples = objArray[0] as FloatArray,
- sampleRate = objArray[1] as Int
- )
- }
-
- fun allocate(assetManager: AssetManager? = null) {
- if (ptr == 0L) {
- ptr = if (assetManager != null) {
- newFromAsset(assetManager, config)
- } else {
- newFromFile(config)
- }
- }
- }
-
- fun free() {
- if (ptr != 0L) {
- delete(ptr)
- ptr = 0
- }
- }
-
- protected fun finalize() {
- if (ptr != 0L) {
- delete(ptr)
- ptr = 0
- }
- }
-
- fun release() = finalize()
-
- private external fun newFromAsset(
- assetManager: AssetManager,
- config: OfflineTtsConfig,
- ): Long
-
- private external fun newFromFile(
- config: OfflineTtsConfig,
- ): Long
-
- private external fun delete(ptr: Long)
- private external fun getSampleRate(ptr: Long): Int
- private external fun getNumSpeakers(ptr: Long): Int
-
- // The returned array has two entries:
- // - the first entry is an 1-D float array containing audio samples.
- // Each sample is normalized to the range [-1, 1]
- // - the second entry is the sample rate
- private external fun generateImpl(
- ptr: Long,
- text: String,
- sid: Int = 0,
- speed: Float = 1.0f
- ): Array
-
- private external fun generateWithCallbackImpl(
- ptr: Long,
- text: String,
- sid: Int = 0,
- speed: Float = 1.0f,
- callback: (samples: FloatArray) -> Int
- ): Array
-
- companion object {
- init {
- System.loadLibrary("sherpa-onnx-jni")
- }
- }
-}
-
-// please refer to
-// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
-// to download models
-fun getOfflineTtsConfig(
- modelDir: String,
- modelName: String,
- lexicon: String,
- dataDir: String,
- dictDir: String,
- ruleFsts: String,
- ruleFars: String
-): OfflineTtsConfig {
- return OfflineTtsConfig(
- model = OfflineTtsModelConfig(
- vits = OfflineTtsVitsModelConfig(
- model = "$modelDir/$modelName",
- lexicon = "$modelDir/$lexicon",
- tokens = "$modelDir/tokens.txt",
- dataDir = dataDir,
- dictDir = dictDir,
- ),
- numThreads = 2,
- debug = true,
- provider = "cpu",
- ),
- ruleFsts = ruleFsts,
- ruleFars = ruleFars,
- )
-}
diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
new file mode 120000
index 0000000000..f1392e7712
--- /dev/null
+++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Tts.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt
index a01e0a7b6d..e372be4329 100644
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt
@@ -47,7 +47,7 @@ fun getSampleText(lang: String): String {
}
"eng" -> {
- text = "This is a text-to-speech engine using next generation Kaldi"
+ text = "How are you doing today? This is a text-to-speech engine using next generation Kaldi"
}
"est" -> {
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
index 9a6bd47aba..c96f9f0efc 100644
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
@@ -3,6 +3,10 @@
package com.k2fsa.sherpa.onnx.tts.engine
import PreferenceHelper
+import android.media.AudioAttributes
+import android.media.AudioFormat
+import android.media.AudioManager
+import android.media.AudioTrack
import android.media.MediaPlayer
import android.net.Uri
import android.os.Bundle
@@ -18,7 +22,9 @@ import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.wrapContentHeight
+import androidx.compose.foundation.rememberScrollState
import androidx.compose.foundation.text.KeyboardOptions
+import androidx.compose.foundation.verticalScroll
import androidx.compose.material3.Button
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.MaterialTheme
@@ -36,7 +42,13 @@ import androidx.compose.ui.Modifier
import androidx.compose.ui.text.input.KeyboardType
import androidx.compose.ui.unit.dp
import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.channels.Channel
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withContext
import java.io.File
+import kotlin.time.TimeSource
const val TAG = "sherpa-onnx-tts-engine"
@@ -45,9 +57,26 @@ class MainActivity : ComponentActivity() {
private val ttsViewModel: TtsViewModel by viewModels()
private var mediaPlayer: MediaPlayer? = null
+
+ // see
+ // https://developer.android.com/reference/kotlin/android/media/AudioTrack
+ private lateinit var track: AudioTrack
+
+ private var stopped: Boolean = false
+
+ private var samplesChannel = Channel()
+
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
+
+ Log.i(TAG, "Start to initialize TTS")
TtsEngine.createTts(this)
+ Log.i(TAG, "Finish initializing TTS")
+
+ Log.i(TAG, "Start to initialize AudioTrack")
+ initAudioTrack()
+ Log.i(TAG, "Finish initializing AudioTrack")
+
val preferenceHelper = PreferenceHelper(this)
setContent {
SherpaOnnxTtsEngineTheme {
@@ -57,7 +86,7 @@ class MainActivity : ComponentActivity() {
color = MaterialTheme.colorScheme.background
) {
Scaffold(topBar = {
- TopAppBar(title = { Text("Next-gen Kaldi: TTS") })
+ TopAppBar(title = { Text("Next-gen Kaldi: TTS Engine") })
}) {
Box(modifier = Modifier.padding(it)) {
Column(modifier = Modifier.padding(16.dp)) {
@@ -65,8 +94,8 @@ class MainActivity : ComponentActivity() {
Text("Speed " + String.format("%.1f", TtsEngine.speed))
Slider(
value = TtsEngine.speedState.value,
- onValueChange = {
- TtsEngine.speed = it
+ onValueChange = {
+ TtsEngine.speed = it
preferenceHelper.setSpeed(it)
},
valueRange = 0.2F..3.0F,
@@ -77,6 +106,12 @@ class MainActivity : ComponentActivity() {
val testTextContent = getSampleText(TtsEngine.lang ?: "")
var testText by remember { mutableStateOf(testTextContent) }
+ var startEnabled by remember { mutableStateOf(true) }
+ var playEnabled by remember { mutableStateOf(false) }
+ var rtfText by remember {
+ mutableStateOf("")
+ }
+ val scrollState = rememberScrollState(0)
val numSpeakers = TtsEngine.tts!!.numSpeakers()
if (numSpeakers > 1) {
@@ -110,59 +145,128 @@ class MainActivity : ComponentActivity() {
value = testText,
onValueChange = { testText = it },
label = { Text("Please input your text here") },
+ maxLines = 10,
modifier = Modifier
.fillMaxWidth()
.padding(bottom = 16.dp)
+ .verticalScroll(scrollState)
.wrapContentHeight(),
singleLine = false,
)
Row {
Button(
- modifier = Modifier.padding(20.dp),
+ enabled = startEnabled,
+ modifier = Modifier.padding(5.dp),
onClick = {
Log.i(TAG, "Clicked, text: $testText")
if (testText.isBlank() || testText.isEmpty()) {
Toast.makeText(
applicationContext,
- "Please input a test sentence",
+ "Please input some text to generate",
Toast.LENGTH_SHORT
).show()
} else {
- val audio = TtsEngine.tts!!.generate(
- text = testText,
- sid = TtsEngine.speakerId,
- speed = TtsEngine.speed,
- )
-
- val filename =
- application.filesDir.absolutePath + "/generated.wav"
- val ok =
- audio.samples.isNotEmpty() && audio.save(filename)
-
- if (ok) {
- stopMediaPlayer()
- mediaPlayer = MediaPlayer.create(
- applicationContext,
- Uri.fromFile(File(filename))
- )
- mediaPlayer?.start()
- } else {
- Log.i(TAG, "Failed to generate or save audio")
+ startEnabled = false
+ playEnabled = false
+ stopped = false
+
+ track.pause()
+ track.flush()
+ track.play()
+ rtfText = ""
+ Log.i(TAG, "Started with text $testText")
+
+ samplesChannel = Channel()
+
+ CoroutineScope(Dispatchers.IO).launch {
+ for (samples in samplesChannel) {
+ track.write(
+ samples,
+ 0,
+ samples.size,
+ AudioTrack.WRITE_BLOCKING
+ )
+ if (stopped) {
+ break
+ }
+ }
}
+
+ CoroutineScope(Dispatchers.Default).launch {
+ val timeSource = TimeSource.Monotonic
+ val startTime = timeSource.markNow()
+
+ val audio =
+ TtsEngine.tts!!.generateWithCallback(
+ text = testText,
+ sid = TtsEngine.speakerId,
+ speed = TtsEngine.speed,
+ callback = ::callback,
+ )
+
+ val elapsed =
+ startTime.elapsedNow().inWholeMilliseconds.toFloat() / 1000;
+ val audioDuration =
+ audio.samples.size / TtsEngine.tts!!.sampleRate()
+ .toFloat()
+ val RTF = String.format(
+ "Number of threads: %d\nElapsed: %.3f s\nAudio duration: %.3f s\nRTF: %.3f/%.3f = %.3f",
+ TtsEngine.tts!!.config.model.numThreads,
+ audioDuration,
+ elapsed,
+ elapsed,
+ audioDuration,
+ elapsed / audioDuration
+ )
+ samplesChannel.close()
+
+ val filename =
+ application.filesDir.absolutePath + "/generated.wav"
+
+
+ val ok =
+ audio.samples.isNotEmpty() && audio.save(
+ filename
+ )
+
+ if (ok) {
+ withContext(Dispatchers.Main) {
+ startEnabled = true
+ playEnabled = true
+ rtfText = RTF
+ }
+ }
+ }.start()
}
}) {
- Text("Test")
+ Text("Start")
+ }
+
+ Button(
+ modifier = Modifier.padding(5.dp),
+ enabled = playEnabled,
+ onClick = {
+ stopped = true
+ track.pause()
+ track.flush()
+ onClickPlay()
+ }) {
+ Text("Play")
}
Button(
- modifier = Modifier.padding(20.dp),
+ modifier = Modifier.padding(5.dp),
onClick = {
- TtsEngine.speakerId = 0
- TtsEngine.speed = 1.0f
- testText = ""
+ onClickStop()
+ startEnabled = true
}) {
- Text("Reset")
+ Text("Stop")
+ }
+ }
+ if (rtfText.isNotEmpty()) {
+ Row {
+ Text(rtfText)
}
}
}
@@ -183,4 +287,63 @@ class MainActivity : ComponentActivity() {
mediaPlayer?.release()
mediaPlayer = null
}
+
+ private fun onClickPlay() {
+ val filename = application.filesDir.absolutePath + "/generated.wav"
+ stopMediaPlayer()
+ mediaPlayer = MediaPlayer.create(
+ applicationContext,
+ Uri.fromFile(File(filename))
+ )
+ mediaPlayer?.start()
+ }
+
+ private fun onClickStop() {
+ stopped = true
+ track.pause()
+ track.flush()
+
+ stopMediaPlayer()
+ }
+
+ // this function is called from C++
+ private fun callback(samples: FloatArray): Int {
+ if (!stopped) {
+ val samplesCopy = samples.copyOf()
+ CoroutineScope(Dispatchers.IO).launch {
+ samplesChannel.send(samplesCopy)
+ }
+ return 1
+ } else {
+ track.stop()
+ Log.i(TAG, " return 0")
+ return 0
+ }
+ }
+
+ private fun initAudioTrack() {
+ val sampleRate = TtsEngine.tts!!.sampleRate()
+ val bufLength = AudioTrack.getMinBufferSize(
+ sampleRate,
+ AudioFormat.CHANNEL_OUT_MONO,
+ AudioFormat.ENCODING_PCM_FLOAT
+ )
+ Log.i(TAG, "sampleRate: $sampleRate, buffLength: $bufLength")
+
+ val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+ .setUsage(AudioAttributes.USAGE_MEDIA)
+ .build()
+
+ val format = AudioFormat.Builder()
+ .setEncoding(AudioFormat.ENCODING_PCM_FLOAT)
+ .setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
+ .setSampleRate(sampleRate)
+ .build()
+
+ track = AudioTrack(
+ attr, format, bufLength, AudioTrack.MODE_STREAM,
+ AudioManager.AUDIO_SESSION_ID_GENERATE
+ )
+ track.play()
+ }
}
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt
index bc6a22c571..b510f97d39 120000
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt
@@ -1 +1 @@
-../../../../../../../../../../../SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
\ No newline at end of file
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/Tts.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
index 480f8a384e..2ae628c271 100644
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
@@ -1,5 +1,6 @@
package com.k2fsa.sherpa.onnx.tts.engine
+import PreferenceHelper
import android.content.Context
import android.content.res.AssetManager
import android.util.Log
@@ -11,7 +12,6 @@ import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
-import PreferenceHelper
object TtsEngine {
var tts: OfflineTts? = null
@@ -41,6 +41,9 @@ object TtsEngine {
private var modelDir: String? = null
private var modelName: String? = null
+ private var acousticModelName: String? = null // for matcha tts
+ private var vocoder: String? = null // for matcha tts
+ private var voices: String? = null // for kokoro
private var ruleFsts: String? = null
private var ruleFars: String? = null
private var lexicon: String? = null
@@ -52,8 +55,21 @@ object TtsEngine {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
- modelDir = null
+ //
+ // For VITS -- begin
modelName = null
+ // For VITS -- end
+
+ // For Matcha -- begin
+ acousticModelName = null
+ vocoder = null
+ // For Matcha -- end
+
+ // For Kokoro -- begin
+ voices = null
+ // For Kokoro -- end
+
+ modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
@@ -82,7 +98,6 @@ object TtsEngine {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
- // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// lang = "zho"
@@ -101,8 +116,57 @@ object TtsEngine {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
// lang = "deu"
- }
+ // Example 6
+ // vits-melo-tts-zh_en
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
+ // modelDir = "vits-melo-tts-zh_en"
+ // modelName = "model.onnx"
+ // lexicon = "lexicon.txt"
+ // dictDir = "vits-melo-tts-zh_en/dict"
+ // lang = "zho"
+
+ // Example 7
+ // matcha-icefall-zh-baker
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+ // modelDir = "matcha-icefall-zh-baker"
+ // acousticModelName = "model-steps-3.onnx"
+ // vocoder = "hifigan_v2.onnx"
+ // lexicon = "lexicon.txt"
+ // dictDir = "matcha-icefall-zh-baker/dict"
+ // lang = "zho"
+
+ // Example 8
+ // matcha-icefall-en_US-ljspeech
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ // modelDir = "matcha-icefall-en_US-ljspeech"
+ // acousticModelName = "model-steps-3.onnx"
+ // vocoder = "hifigan_v2.onnx"
+ // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
+ // lang = "eng"
+
+ // Example 9
+ // kokoro-en-v0_19
+ // modelDir = "kokoro-en-v0_19"
+ // modelName = "model.onnx"
+ // voices = "voices.bin"
+ // dataDir = "kokoro-en-v0_19/espeak-ng-data"
+ // lang = "eng"
+
+ // Example 10
+ // kokoro-multi-lang-v1_0
+ // modelDir = "kokoro-multi-lang-v1_0"
+ // modelName = "model.onnx"
+ // voices = "voices.bin"
+ // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
+ // dictDir = "kokoro-multi-lang-v1_0/dict"
+ // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
+ // lang = "eng"
+ // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
+ //
+ // This model supports many languages, e.g., English, Chinese, etc.
+ // We set lang to eng here.
+ }
fun createTts(context: Context) {
Log.i(TAG, "Init Next-gen Kaldi TTS")
@@ -115,22 +179,25 @@ object TtsEngine {
assets = context.assets
if (dataDir != null) {
- val newDir = copyDataDir(context, modelDir!!)
- modelDir = "$newDir/$modelDir"
+ val newDir = copyDataDir(context, dataDir!!)
dataDir = "$newDir/$dataDir"
- assets = null
}
if (dictDir != null) {
- val newDir = copyDataDir(context, modelDir!!)
- modelDir = "$newDir/$modelDir"
- dictDir = "$modelDir/dict"
- ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
- assets = null
+ val newDir = copyDataDir(context, dictDir!!)
+ dictDir = "$newDir/$dictDir"
+ if (ruleFsts == null) {
+ ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
+ }
}
val config = getOfflineTtsConfig(
- modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
+ modelDir = modelDir!!,
+ modelName = modelName ?: "",
+ acousticModelName = acousticModelName ?: "",
+ vocoder = vocoder ?: "",
+ voices = voices ?: "",
+ lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml b/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml
index ac28473148..67518e0a38 100755
--- a/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml
+++ b/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml
@@ -1,3 +1,3 @@
- TTS Engine
+ TTS Engine: Next-gen Kaldi
\ No newline at end of file
diff --git a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
index fb14d072d3..2f65276153 100644
--- a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
+++ b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
@@ -19,6 +19,11 @@ import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.getFeatureConfig
import com.k2fsa.sherpa.onnx.getOfflineModelConfig
import com.k2fsa.sherpa.onnx.getVadModelConfig
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.cancel
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withContext
import kotlin.concurrent.thread
@@ -166,6 +171,8 @@ class MainActivity : AppCompatActivity() {
val bufferSize = 512 // in samples
val buffer = ShortArray(bufferSize)
+ val coroutineScope = CoroutineScope(Dispatchers.IO)
+
while (isRecording) {
val ret = audioRecord?.read(buffer, 0, buffer.size)
@@ -175,11 +182,15 @@ class MainActivity : AppCompatActivity() {
vad.acceptWaveform(samples)
while(!vad.empty()) {
var segment = vad.front()
- val text = runSecondPass(segment.samples)
-
- if (text.isNotBlank()) {
- lastText = "${lastText}\n${idx}: ${text}"
- idx += 1
+ coroutineScope.launch {
+ val text = runSecondPass(segment.samples)
+ if (text.isNotBlank()) {
+ withContext(Dispatchers.Main) {
+ lastText = "${lastText}\n${idx}: ${text}"
+ idx += 1
+ textView.text = lastText.lowercase()
+ }
+ }
}
vad.pop();
@@ -192,6 +203,9 @@ class MainActivity : AppCompatActivity() {
}
}
}
+
+ // Clean up the coroutine scope when done
+ coroutineScope.cancel()
}
private fun initOfflineRecognizer() {
diff --git a/build-aarch64-linux-gnu.sh b/build-aarch64-linux-gnu.sh
index d9851fbe1b..cdc48e3729 100755
--- a/build-aarch64-linux-gnu.sh
+++ b/build-aarch64-linux-gnu.sh
@@ -1,4 +1,25 @@
#!/usr/bin/env bash
+#
+# Usage of this file
+#
+# (1) Build CPU version of sherpa-onnx
+# ./build-aarch64-linux-gnu.sh
+#
+# (2) Build GPU version of sherpa-onnx
+#
+# (a) Make sure your board has NVIDIA GPU(s)
+#
+# (b) For Jetson Nano B01 (using CUDA 10.2)
+#
+# export SHERPA_ONNX_ENABLE_GPU=ON
+# export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.11.0
+# ./build-aarch64-linux-gnu.sh
+#
+# (c) For Jetson Orin NX (using CUDA 11.4)
+#
+# export SHERPA_ONNX_ENABLE_GPU=ON
+# export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.16.0
+# ./build-aarch64-linux-gnu.sh
if command -v aarch64-none-linux-gnu-gcc &> /dev/null; then
ln -svf $(which aarch64-none-linux-gnu-gcc) ./aarch64-linux-gnu-gcc
@@ -44,6 +65,21 @@ if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then
BUILD_SHARED_LIBS=OFF
fi
+if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"" ]]; then
+ # By default, use CPU
+ SHERPA_ONNX_ENABLE_GPU=OFF
+fi
+
+if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"ON" ]]; then
+ # Build shared libs if building GPU is enabled.
+ BUILD_SHARED_LIBS=ON
+fi
+
+if [[ x"$SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION" == x"" ]]; then
+ # Used only when SHERPA_ONNX_ENABLE_GPU is ON
+ SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION="1.11.0"
+fi
+
cmake \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
@@ -51,6 +87,7 @@ cmake \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
+ -DSHERPA_ONNX_ENABLE_GPU=$SHERPA_ONNX_ENABLE_GPU \
-DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
@@ -59,6 +96,7 @@ cmake \
-DSHERPA_ONNX_ENABLE_JNI=OFF \
-DSHERPA_ONNX_ENABLE_C_API=ON \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=ON \
+ -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=$SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION \
-DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \
..
diff --git a/build-android-arm64-v8a.sh b/build-android-arm64-v8a.sh
index 7967af018e..88ba09ef03 100755
--- a/build-android-arm64-v8a.sh
+++ b/build-android-arm64-v8a.sh
@@ -1,7 +1,26 @@
#!/usr/bin/env bash
set -ex
-dir=$PWD/build-android-arm64-v8a
+# If BUILD_SHARED_LIBS is ON, we use libonnxruntime.so
+# If BUILD_SHARED_LIBS is OFF, we use libonnxruntime.a
+#
+# In any case, we will have libsherpa-onnx-jni.so
+#
+# If BUILD_SHARED_LIBS is OFF, then libonnxruntime.a is linked into libsherpa-onnx-jni.so
+# and you only need to copy libsherpa-onnx-jni.so to your Android projects.
+#
+# If BUILD_SHARED_LIBS is ON, then you need to copy both libsherpa-onnx-jni.so
+# and libonnxruntime.so to your Android projects
+#
+if [ -z $BUILD_SHARED_LIBS ]; then
+ BUILD_SHARED_LIBS=ON
+fi
+
+if [ $BUILD_SHARED_LIBS == ON ]; then
+ dir=$PWD/build-android-arm64-v8a
+else
+ dir=$PWD/build-android-arm64-v8a-static
+fi
mkdir -p $dir
cd $dir
@@ -21,6 +40,9 @@ cd $dir
if [ -z $ANDROID_NDK ]; then
ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/22.1.7171670
+ if [ $BUILD_SHARED_LIBS == OFF ]; then
+ ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/27.0.11718014
+ fi
# or use
# ANDROID_NDK=/star-fj/fangjun/software/android-ndk
#
@@ -32,6 +54,10 @@ if [ -z $ANDROID_NDK ]; then
# Tools -> SDK manager -> Android SDK
# and set "Android SDK location" to /Users/fangjun/software/my-android
ANDROID_NDK=/Users/fangjun/software/my-android/ndk/22.1.7171670
+
+ if [ $BUILD_SHARED_LIBS == OFF ]; then
+ ANDROID_NDK=/Users/fangjun/software/my-android/ndk/27.0.11718014
+ fi
fi
fi
@@ -44,17 +70,29 @@ echo "ANDROID_NDK: $ANDROID_NDK"
sleep 1
onnxruntime_version=1.17.1
-if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
- mkdir -p $onnxruntime_version
- pushd $onnxruntime_version
- wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
- unzip onnxruntime-android-${onnxruntime_version}.zip
- rm onnxruntime-android-${onnxruntime_version}.zip
- popd
-fi
+if [ $BUILD_SHARED_LIBS == ON ]; then
+ if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
+ mkdir -p $onnxruntime_version
+ pushd $onnxruntime_version
+ wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
+ unzip onnxruntime-android-${onnxruntime_version}.zip
+ rm onnxruntime-android-${onnxruntime_version}.zip
+ popd
+ fi
+
+ export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/arm64-v8a/
+ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+else
+ if [ ! -f ${onnxruntime_version}-static/lib/libonnxruntime.a ]; then
+ wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version}.zip
+ unzip onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version}.zip
+ rm onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version}.zip
+ mv onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version} ${onnxruntime_version}-static
+ fi
-export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/arm64-v8a/
-export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+ export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version-static/lib/
+ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version-static/include/
+fi
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
@@ -88,24 +126,46 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_BUILD_TYPE=Release \
- -DBUILD_SHARED_LIBS=ON \
+ -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=$SHERPA_ONNX_ENABLE_JNI \
+ -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
-DSHERPA_ONNX_ENABLE_C_API=$SHERPA_ONNX_ENABLE_C_API \
-DCMAKE_INSTALL_PREFIX=./install \
-DANDROID_ABI="arm64-v8a" \
-DANDROID_PLATFORM=android-21 ..
+ # By default, it links to libc++_static.a
+ # -DANDROID_STL=c++_shared \
+
# Please use -DANDROID_PLATFORM=android-27 if you want to use Android NNAPI
# make VERBOSE=1 -j4
make -j4
make install/strip
-cp -fv $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so install/lib
+cp -fv $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so install/lib 2>/dev/null || true
+rm -rf install/share
rm -rf install/lib/pkgconfig
+rm -rf install/lib/lib*.a
+if [ -f install/lib/libsherpa-onnx-c-api.so ]; then
+ cat >install/lib/README.md < SDK manager -> Android SDK
# and set "Android SDK location" to /Users/fangjun/software/my-android
ANDROID_NDK=/Users/fangjun/software/my-android/ndk/22.1.7171670
+
+ if [ $BUILD_SHARED_LIBS == OFF ]; then
+ ANDROID_NDK=/Users/fangjun/software/my-android/ndk/27.0.11718014
+ fi
fi
fi
@@ -45,17 +71,29 @@ sleep 1
onnxruntime_version=1.17.1
-if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
- mkdir -p $onnxruntime_version
- pushd $onnxruntime_version
- wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
- unzip onnxruntime-android-${onnxruntime_version}.zip
- rm onnxruntime-android-${onnxruntime_version}.zip
- popd
-fi
+if [ $BUILD_SHARED_LIBS == ON ]; then
+ if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
+ mkdir -p $onnxruntime_version
+ pushd $onnxruntime_version
+ wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
+ unzip onnxruntime-android-${onnxruntime_version}.zip
+ rm onnxruntime-android-${onnxruntime_version}.zip
+ popd
+ fi
-export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/armeabi-v7a/
-export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+ export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/armeabi-v7a/
+ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+else
+ if [ ! -f ${onnxruntime_version}-static/lib/libonnxruntime.a ]; then
+ wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version}.zip
+ unzip onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version}.zip
+ rm onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version}.zip
+ mv onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version} ${onnxruntime_version}-static
+ fi
+
+ export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version-static/lib/
+ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version-static/include/
+fi
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
@@ -89,18 +127,42 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_BUILD_TYPE=Release \
- -DBUILD_SHARED_LIBS=ON \
+ -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=$SHERPA_ONNX_ENABLE_JNI \
+ -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
-DSHERPA_ONNX_ENABLE_C_API=$SHERPA_ONNX_ENABLE_C_API \
-DCMAKE_INSTALL_PREFIX=./install \
-DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON \
-DANDROID_PLATFORM=android-21 ..
+
+ # By default, it links to libc++_static.a
+ # -DANDROID_STL=c++_shared \
+
# make VERBOSE=1 -j4
make -j4
make install/strip
-cp -fv $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so install/lib
+cp -fv $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so install/lib 2>/dev/null || true
+rm -rf install/share
rm -rf install/lib/pkgconfig
+rm -rf install/lib/lib*.a
+
+if [ -f install/lib/libsherpa-onnx-c-api.so ]; then
+ cat >install/lib/README.md < SDK manager -> Android SDK
# and set "Android SDK location" to /Users/fangjun/software/my-android
ANDROID_NDK=/Users/fangjun/software/my-android/ndk/22.1.7171670
+
+ if [ $BUILD_SHARED_LIBS == OFF ]; then
+ ANDROID_NDK=/Users/fangjun/software/my-android/ndk/27.0.11718014
+ fi
fi
fi
@@ -45,17 +71,29 @@ sleep 1
onnxruntime_version=1.17.1
-if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
- mkdir -p $onnxruntime_version
- pushd $onnxruntime_version
- wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
- unzip onnxruntime-android-${onnxruntime_version}.zip
- rm onnxruntime-android-${onnxruntime_version}.zip
- popd
-fi
+if [ $BUILD_SHARED_LIBS == ON ]; then
+ if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
+ mkdir -p $onnxruntime_version
+ pushd $onnxruntime_version
+ wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
+ unzip onnxruntime-android-${onnxruntime_version}.zip
+ rm onnxruntime-android-${onnxruntime_version}.zip
+ popd
+ fi
-export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/x86_64/
-export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+ export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/x86_64/
+ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+else
+ if [ ! -f ${onnxruntime_version}-static/lib/libonnxruntime.a ]; then
+ wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-x86_64-static_lib-${onnxruntime_version}.zip
+ unzip onnxruntime-android-x86_64-static_lib-${onnxruntime_version}.zip
+ rm onnxruntime-android-x86_64-static_lib-${onnxruntime_version}.zip
+ mv onnxruntime-android-x86_64-static_lib-${onnxruntime_version} ${onnxruntime_version}-static
+ fi
+
+ export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version-static/lib/
+ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version-static/include/
+fi
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
@@ -89,20 +127,44 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_BUILD_TYPE=Release \
- -DBUILD_SHARED_LIBS=ON \
+ -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=$SHERPA_ONNX_ENABLE_JNI \
+ -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
-DCMAKE_INSTALL_PREFIX=./install \
-DANDROID_ABI="x86_64" \
-DSHERPA_ONNX_ENABLE_C_API=$SHERPA_ONNX_ENABLE_C_API \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DANDROID_PLATFORM=android-21 ..
+ # By default, it links to libc++_static.a
+ # -DANDROID_STL=c++_shared \
+
# make VERBOSE=1 -j4
make -j4
make install/strip
-cp -fv $onnxruntime_version/jni/x86_64/libonnxruntime.so install/lib
+
+cp -fv $onnxruntime_version/jni/x86_64/libonnxruntime.so install/lib 2>/dev/null || true
+rm -rf install/share
rm -rf install/lib/pkgconfig
+rm -rf install/lib/lib*.a
+
+if [ -f install/lib/libsherpa-onnx-c-api.so ]; then
+ cat >install/lib/README.md <install/lib/README.md <$dst/Info.plist <
+
+
+
+ CFBundleName
+ sherpa_onnx
+ DTSDKName
+ iphoneos17.0
+ DTXcode
+ 1501
+ DTSDKBuild
+ 21A326
+ CFBundleDevelopmentRegion
+ en
+ CFBundleVersion
+ 1
+ BuildMachineOSBuild
+ 23B81
+ DTPlatformName
+ iphoneos
+ CFBundlePackageType
+ FMWK
+ CFBundleShortVersionString
+ 1.10.42
+ CFBundleSupportedPlatforms
+
+ iPhoneOS
+
+ CFBundleInfoDictionaryVersion
+ 6.0
+ CFBundleExecutable
+ sherpa_onnx
+ DTCompiler
+ com.apple.compilers.llvm.clang.1_0
+ UIRequiredDeviceCapabilities
+
+ arm64
+
+ MinimumOSVersion
+ 13.0
+ CFBundleIdentifier
+ com.k2fsa.sherpa.onnx
+ UIDeviceFamily
+
+ 1
+ 2
+
+ CFBundleSignature
+ ????
+ DTPlatformVersion
+ 17.0
+ DTXcodeBuild
+ 15A507
+ DTPlatformBuild
+ 21A326
+ SupportedArchitectures
+
+ arm64
+ x86_64
+
+ SupportedPlatform
+ ios
+
+
+EOF
+done
+
+rm -rf sherpa_onnx.xcframework
+xcodebuild -create-xcframework \
+ -framework ios-arm64/sherpa_onnx.framework \
+ -framework ios-arm64_x86_64-simulator/sherpa_onnx.framework \
+ -output sherpa_onnx.xcframework
+
+cd sherpa_onnx.xcframework
+echo "PWD: $PWD"
+ls -lh
+echo "---"
+ls -lh */*
diff --git a/build-ohos-arm64-v8a.sh b/build-ohos-arm64-v8a.sh
new file mode 100755
index 0000000000..4e0ecbb299
--- /dev/null
+++ b/build-ohos-arm64-v8a.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+set -ex
+
+dir=$PWD/build-ohos-arm64-v8a
+
+mkdir -p $dir
+cd $dir
+
+# Please first download the commandline tools from
+# https://developer.huawei.com/consumer/cn/download/
+#
+# Example filename on Linux: commandline-tools-linux-x64-5.0.5.200.zip
+# You can also download it from https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main
+
+# mkdir /star-fj/fangjun/software/huawei
+# cd /star-fj/fangjun/software/huawei
+# wget https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+# unzip commandline-tools-linux-x64-5.0.5.200.zip
+# rm commandline-tools-linux-x64-5.0.5.200.zip
+if [ -z $OHOS_SDK_NATIVE_DIR ]; then
+ OHOS_SDK_NATIVE_DIR=/star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+ # You can find the following content inside OHOS_SDK_NATIVE_DIR
+ # ls -lh /star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+ # total 524K
+ # -rw-r--r-- 1 kuangfangjun root 501K Jan 1 2001 NOTICE.txt
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 build
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 build-tools
+ # -rw-r--r-- 1 kuangfangjun root 371 Jan 1 2001 compatible_config.json
+ # drwxr-xr-x 4 kuangfangjun root 0 Nov 6 22:36 docs
+ # drwxr-xr-x 10 kuangfangjun root 0 Nov 6 22:36 llvm
+ # -rw-r--r-- 1 kuangfangjun root 16K Jan 1 2001 nativeapi_syscap_config.json
+ # -rw-r--r-- 1 kuangfangjun root 5.9K Jan 1 2001 ndk_system_capability.json
+ # -rw-r--r-- 1 kuangfangjun root 167 Jan 1 2001 oh-uni-package.json
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+ OHOS_SDK_NATIVE_DIR=/Users/fangjun/software/command-line-tools/sdk/default/openharmony/native
+ # (py38) fangjuns-MacBook-Pro:software fangjun$ ls -lh command-line-tools/sdk/default/openharmony/native/
+ # total 752
+ # -rw-r--r-- 1 fangjun staff 341K Jan 1 2001 NOTICE.txt
+ # drwxr-xr-x 3 fangjun staff 96B Nov 6 21:17 build
+ # drwxr-xr-x 3 fangjun staff 96B Nov 6 21:18 build-tools
+ # -rw-r--r-- 1 fangjun staff 371B Jan 1 2001 compatible_config.json
+ # drwxr-xr-x 10 fangjun staff 320B Nov 6 21:18 llvm
+ # -rw-r--r-- 1 fangjun staff 16K Jan 1 2001 nativeapi_syscap_config.json
+ # -rw-r--r-- 1 fangjun staff 5.9K Jan 1 2001 ndk_system_capability.json
+ # -rw-r--r-- 1 fangjun staff 167B Jan 1 2001 oh-uni-package.json
+ # drwxr-xr-x 3 fangjun staff 96B Nov 6 21:17 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+ echo "Please first download Command Line Tools for HarmonyOS"
+ echo "See https://developer.huawei.com/consumer/cn/download/"
+ echo "or"
+ echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+ exit 1
+fi
+
+if [ ! -f $OHOS_SDK_NATIVE_DIR/llvm/bin/aarch64-unknown-linux-ohos-clang ]; then
+ echo "$OHOS_SDK_NATIVE_DIR/llvm/bin/aarch64-unknown-linux-ohos-clang does not exist"
+ echo "Please first download Command Line Tools for HarmonyOS"
+ echo "See https://developer.huawei.com/consumer/cn/download/"
+ echo "or"
+ echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+ exit 1
+fi
+
+export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+export PATH=$OHOS_SDK_NATIVE_DIR/llvm/bin:$PATH
+
+OHOS_TOOLCHAIN_FILE=$OHOS_SDK_NATIVE_DIR/build/cmake/ohos.toolchain.cmake
+
+if [ ! -f $OHOS_TOOLCHAIN_FILE ]; then
+ echo "$OHOS_TOOLCHAIN_FILE does not exist"
+ echo "Please first download Command Line Tools for HarmonyOS"
+ exit 1
+fi
+
+sleep 1
+onnxruntime_version=1.16.3
+onnxruntime_dir=onnxruntime-ohos-arm64-v8a-$onnxruntime_version
+
+if [ ! -f $onnxruntime_dir/lib/libonnxruntime.so ]; then
+ # wget -c https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/$onnxruntime_dir.zip
+ wget -c https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/$onnxruntime_dir.zip
+ unzip $onnxruntime_dir.zip
+ rm $onnxruntime_dir.zip
+fi
+
+export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_dir/lib
+export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_dir/include
+
+echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
+echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
+
+if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
+ SHERPA_ONNX_ENABLE_TTS=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ]; then
+ SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
+ SHERPA_ONNX_ENABLE_BINARY=OFF
+fi
+
+cmake \
+ -DOHOS_ARCH=arm64-v8a \
+ -DCMAKE_TOOLCHAIN_FILE=$OHOS_TOOLCHAIN_FILE \
+ -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
+ -DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=$SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION \
+ -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
+ -DBUILD_PIPER_PHONMIZE_EXE=OFF \
+ -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
+ -DBUILD_ESPEAK_NG_EXE=OFF \
+ -DBUILD_ESPEAK_NG_TESTS=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DBUILD_SHARED_LIBS=ON \
+ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+ -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+ -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+ -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+ -DSHERPA_ONNX_ENABLE_JNI=OFF \
+ -DSHERPA_ONNX_ENABLE_C_API=ON \
+ -DCMAKE_INSTALL_PREFIX=./install \
+ ..
+
+# make VERBOSE=1 -j4
+make -j2
+make install/strip
+cp -fv $onnxruntime_dir/lib/libonnxruntime.so install/lib
+
+rm -rf install/share
+rm -rf install/lib/pkgconfig
+
+d=../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/arm64-v8a
+if [ -d $d ]; then
+ cp -v install/lib/libsherpa-onnx-c-api.so $d/
+ cp -v install/lib/libonnxruntime.so $d/
+fi
diff --git a/build-ohos-armeabi-v7a.sh b/build-ohos-armeabi-v7a.sh
new file mode 100755
index 0000000000..e0a2ac8839
--- /dev/null
+++ b/build-ohos-armeabi-v7a.sh
@@ -0,0 +1,126 @@
+#!/usr/bin/env bash
+set -ex
+
+dir=$PWD/build-ohos-armeabi-v7a
+
+mkdir -p $dir
+cd $dir
+
+# Please first download the commandline tools from
+# https://developer.huawei.com/consumer/cn/download/
+#
+# Example filename on Linux: commandline-tools-linux-x64-5.0.5.200.zip
+# You can also download it from https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main
+
+# mkdir /star-fj/fangjun/software/huawei
+# cd /star-fj/fangjun/software/huawei
+# wget https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+# unzip commandline-tools-linux-x64-5.0.5.200.zip
+# rm commandline-tools-linux-x64-5.0.5.200.zip
+if [ -z $OHOS_SDK_NATIVE_DIR ]; then
+ OHOS_SDK_NATIVE_DIR=/star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+ export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+ # You can find the following content inside OHOS_SDK_NATIVE_DIR
+ # ls -lh /star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+ # total 524K
+ # -rw-r--r-- 1 kuangfangjun root 501K Jan 1 2001 NOTICE.txt
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 build
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 build-tools
+ # -rw-r--r-- 1 kuangfangjun root 371 Jan 1 2001 compatible_config.json
+ # drwxr-xr-x 4 kuangfangjun root 0 Nov 6 22:36 docs
+ # drwxr-xr-x 10 kuangfangjun root 0 Nov 6 22:36 llvm
+ # -rw-r--r-- 1 kuangfangjun root 16K Jan 1 2001 nativeapi_syscap_config.json
+ # -rw-r--r-- 1 kuangfangjun root 5.9K Jan 1 2001 ndk_system_capability.json
+ # -rw-r--r-- 1 kuangfangjun root 167 Jan 1 2001 oh-uni-package.json
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+ echo "Please first download Command Line Tools for HarmonyOS"
+ echo "See https://developer.huawei.com/consumer/cn/download/"
+ echo "or"
+ echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+ exit 1
+fi
+
+if [ ! -f $OHOS_SDK_NATIVE_DIR/llvm/bin/armv7-unknown-linux-ohos-clang ]; then
+ echo "$OHOS_SDK_NATIVE_DIR/llvm/bin/armv7-unknown-linux-ohos-clang does not exist"
+ echo "Please first download Command Line Tools for HarmonyOS"
+ echo "See https://developer.huawei.com/consumer/cn/download/"
+ echo "or"
+ echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+ exit 1
+fi
+
+export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+export PATH=$OHOS_SDK_NATIVE_DIR/llvm/bin:$PATH
+
+OHOS_TOOLCHAIN_FILE=$OHOS_SDK_NATIVE_DIR/build/cmake/ohos.toolchain.cmake
+
+if [ ! -f $OHOS_TOOLCHAIN_FILE ]; then
+ echo "$OHOS_TOOLCHAIN_FILE does not exist"
+ echo "Please first download Command Line Tools for HarmonyOS"
+ exit 1
+fi
+
+sleep 1
+onnxruntime_version=1.16.3
+onnxruntime_dir=onnxruntime-ohos-armeabi-v7a-$onnxruntime_version
+
+if [ ! -f $onnxruntime_dir/lib/libonnxruntime.so ]; then
+ # wget -c https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/$onnxruntime_dir.zip
+ wget -c https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/$onnxruntime_dir.zip
+ unzip $onnxruntime_dir.zip
+ rm $onnxruntime_dir.zip
+fi
+
+export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_dir/lib
+export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_dir/include
+
+echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
+echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
+
+if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
+ SHERPA_ONNX_ENABLE_TTS=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ]; then
+ SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
+ SHERPA_ONNX_ENABLE_BINARY=OFF
+fi
+
+# See https://github.com/llvm/llvm-project/issues/57732
+# we need to use -mfloat-abi=hard
+cmake \
+ -DOHOS_ARCH=armeabi-v7a \
+ -DCMAKE_CXX_FLAGS="-mfloat-abi=hard" \
+ -DCMAKE_C_FLAGS="-mfloat-abi=hard" \
+ -DCMAKE_TOOLCHAIN_FILE=$OHOS_TOOLCHAIN_FILE \
+ -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
+ -DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=$SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION \
+ -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
+ -DBUILD_PIPER_PHONMIZE_EXE=OFF \
+ -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
+ -DBUILD_ESPEAK_NG_EXE=OFF \
+ -DBUILD_ESPEAK_NG_TESTS=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DBUILD_SHARED_LIBS=ON \
+ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+ -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+ -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+ -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+ -DSHERPA_ONNX_ENABLE_JNI=OFF \
+ -DSHERPA_ONNX_ENABLE_C_API=ON \
+ -DCMAKE_INSTALL_PREFIX=./install \
+ ..
+
+# make VERBOSE=1 -j4
+make -j2
+make install/strip
+cp -fv $onnxruntime_dir/lib/libonnxruntime.so install/lib
+
+rm -rf install/share
+rm -rf install/lib/pkgconfig
diff --git a/build-ohos-x86-64.sh b/build-ohos-x86-64.sh
new file mode 100755
index 0000000000..9584edafc3
--- /dev/null
+++ b/build-ohos-x86-64.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+set -ex
+
+dir=$PWD/build-ohos-x86-64
+
+mkdir -p $dir
+cd $dir
+
+# Please first download the commandline tools from
+# https://developer.huawei.com/consumer/cn/download/
+#
+# Example filename on Linux: commandline-tools-linux-x64-5.0.5.200.zip
+# You can also download it from https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main
+
+# mkdir /star-fj/fangjun/software/huawei
+# cd /star-fj/fangjun/software/huawei
+# wget https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+# unzip commandline-tools-linux-x64-5.0.5.200.zip
+# rm commandline-tools-linux-x64-5.0.5.200.zip
+if [ -z $OHOS_SDK_NATIVE_DIR ]; then
+ OHOS_SDK_NATIVE_DIR=/star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+ # You can find the following content inside OHOS_SDK_NATIVE_DIR
+ # ls -lh /star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+ # total 524K
+ # -rw-r--r-- 1 kuangfangjun root 501K Jan 1 2001 NOTICE.txt
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 build
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 build-tools
+ # -rw-r--r-- 1 kuangfangjun root 371 Jan 1 2001 compatible_config.json
+ # drwxr-xr-x 4 kuangfangjun root 0 Nov 6 22:36 docs
+ # drwxr-xr-x 10 kuangfangjun root 0 Nov 6 22:36 llvm
+ # -rw-r--r-- 1 kuangfangjun root 16K Jan 1 2001 nativeapi_syscap_config.json
+ # -rw-r--r-- 1 kuangfangjun root 5.9K Jan 1 2001 ndk_system_capability.json
+ # -rw-r--r-- 1 kuangfangjun root 167 Jan 1 2001 oh-uni-package.json
+ # drwxr-xr-x 3 kuangfangjun root 0 Nov 6 22:36 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+ OHOS_SDK_NATIVE_DIR=/Users/fangjun/software/command-line-tools/sdk/default/openharmony/native
+ # (py38) fangjuns-MacBook-Pro:software fangjun$ ls -lh command-line-tools/sdk/default/openharmony/native/
+ # total 752
+ # -rw-r--r-- 1 fangjun staff 341K Jan 1 2001 NOTICE.txt
+ # drwxr-xr-x 3 fangjun staff 96B Nov 6 21:17 build
+ # drwxr-xr-x 3 fangjun staff 96B Nov 6 21:18 build-tools
+ # -rw-r--r-- 1 fangjun staff 371B Jan 1 2001 compatible_config.json
+ # drwxr-xr-x 10 fangjun staff 320B Nov 6 21:18 llvm
+ # -rw-r--r-- 1 fangjun staff 16K Jan 1 2001 nativeapi_syscap_config.json
+ # -rw-r--r-- 1 fangjun staff 5.9K Jan 1 2001 ndk_system_capability.json
+ # -rw-r--r-- 1 fangjun staff 167B Jan 1 2001 oh-uni-package.json
+ # drwxr-xr-x 3 fangjun staff 96B Nov 6 21:17 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+ echo "Please first download Command Line Tools for HarmonyOS"
+ echo "See https://developer.huawei.com/consumer/cn/download/"
+ echo "or"
+ echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+ exit 1
+fi
+
+if [ ! -f $OHOS_SDK_NATIVE_DIR/llvm/bin/x86_64-unknown-linux-ohos-clang ]; then
+ echo "$OHOS_SDK_NATIVE_DIR/llvm/bin/x86_64-unknown-linux-ohos-clang does not exist"
+ echo "Please first download Command Line Tools for HarmonyOS"
+ echo "See https://developer.huawei.com/consumer/cn/download/"
+ echo "or"
+ echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+ exit 1
+fi
+
+export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+export PATH=$OHOS_SDK_NATIVE_DIR/llvm/bin:$PATH
+
+OHOS_TOOLCHAIN_FILE=$OHOS_SDK_NATIVE_DIR/build/cmake/ohos.toolchain.cmake
+
+if [ ! -f $OHOS_TOOLCHAIN_FILE ]; then
+ echo "$OHOS_TOOLCHAIN_FILE does not exist"
+ echo "Please first download Command Line Tools for HarmonyOS"
+ exit 1
+fi
+
+sleep 1
+onnxruntime_version=1.16.3
+onnxruntime_dir=onnxruntime-ohos-x86_64-$onnxruntime_version
+
+if [ ! -f $onnxruntime_dir/lib/libonnxruntime.so ]; then
+ # wget -c https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/$onnxruntime_dir.zip
+ wget -c https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/$onnxruntime_dir.zip
+ unzip $onnxruntime_dir.zip
+ rm $onnxruntime_dir.zip
+fi
+
+export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_dir/lib
+export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_dir/include
+
+echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
+echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
+
+if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
+ SHERPA_ONNX_ENABLE_TTS=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ]; then
+ SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
+ SHERPA_ONNX_ENABLE_BINARY=OFF
+fi
+
+cmake \
+ -DOHOS_ARCH=x86_64 \
+ -DCMAKE_TOOLCHAIN_FILE=$OHOS_TOOLCHAIN_FILE \
+ -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
+ -DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=$SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION \
+ -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
+ -DBUILD_PIPER_PHONMIZE_EXE=OFF \
+ -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
+ -DBUILD_ESPEAK_NG_EXE=OFF \
+ -DBUILD_ESPEAK_NG_TESTS=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DBUILD_SHARED_LIBS=ON \
+ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+ -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+ -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+ -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+ -DSHERPA_ONNX_ENABLE_JNI=OFF \
+ -DSHERPA_ONNX_ENABLE_C_API=ON \
+ -DCMAKE_INSTALL_PREFIX=./install \
+ ..
+
+# make VERBOSE=1 -j4
+make -j2
+make install/strip
+cp -fv $onnxruntime_dir/lib/libonnxruntime.so install/lib
+
+rm -rf install/share
+rm -rf install/lib/pkgconfig
+
+d=../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/x86_64
+if [ -d $d ]; then
+ cp -v install/lib/libsherpa-onnx-c-api.so $d/
+ cp -v install/lib/libonnxruntime.so $d/
+fi
diff --git a/build-swift-macos.sh b/build-swift-macos.sh
index f41dd7d5cb..359ea93714 100755
--- a/build-swift-macos.sh
+++ b/build-swift-macos.sh
@@ -7,6 +7,9 @@ mkdir -p $dir
cd $dir
cmake \
+ -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+ -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
@@ -21,6 +24,7 @@ cmake \
make VERBOSE=1 -j4
make install
+rm -fv ./install/include/cargs.h
libtool -static -o ./install/lib/libsherpa-onnx.a \
./install/lib/libsherpa-onnx-c-api.a \
@@ -34,3 +38,8 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \
./install/lib/libpiper_phonemize.a \
./install/lib/libespeak-ng.a \
./install/lib/libssentencepiece_core.a
+
+xcodebuild -create-xcframework \
+ -library install/lib/libsherpa-onnx.a \
+ -headers install/include \
+ -output sherpa-onnx.xcframework
diff --git a/build-wasm-simd-asr.sh b/build-wasm-simd-asr.sh
index eda18f74d7..c195393325 100755
--- a/build-wasm-simd-asr.sh
+++ b/build-wasm-simd-asr.sh
@@ -14,12 +14,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
echo "git clone https://github.com/emscripten-core/emsdk.git"
echo "cd emsdk"
echo "git pull"
- echo "./emsdk install latest"
- echo "./emsdk activate latest"
+ echo "./emsdk install 3.1.53"
+ echo "./emsdk activate 3.1.53"
echo "source ./emsdk_env.sh"
exit 1
else
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+ emcc --version
fi
fi
diff --git a/build-wasm-simd-kws.sh b/build-wasm-simd-kws.sh
index 6fdf8218f3..408fd75a8b 100755
--- a/build-wasm-simd-kws.sh
+++ b/build-wasm-simd-kws.sh
@@ -9,12 +9,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
echo "git clone https://github.com/emscripten-core/emsdk.git"
echo "cd emsdk"
echo "git pull"
- echo "./emsdk install latest"
- echo "./emsdk activate latest"
+ echo "./emsdk install 3.1.53"
+ echo "./emsdk activate 3.1.53"
echo "source ./emsdk_env.sh"
exit 1
else
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+ emcc --version
fi
fi
diff --git a/build-wasm-simd-nodejs.sh b/build-wasm-simd-nodejs.sh
index 3ad88d5d4c..43023cbedb 100755
--- a/build-wasm-simd-nodejs.sh
+++ b/build-wasm-simd-nodejs.sh
@@ -16,12 +16,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
echo "git clone https://github.com/emscripten-core/emsdk.git"
echo "cd emsdk"
echo "git pull"
- echo "./emsdk install latest"
- echo "./emsdk activate latest"
+ echo "./emsdk install 3.1.53"
+ echo "./emsdk activate 3.1.53"
echo "source ./emsdk_env.sh"
exit 1
else
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+ emcc --version
fi
fi
diff --git a/build-wasm-simd-speaker-diarization.sh b/build-wasm-simd-speaker-diarization.sh
new file mode 100755
index 0000000000..888abb566e
--- /dev/null
+++ b/build-wasm-simd-speaker-diarization.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+# Copyright (c) 2024 Xiaomi Corporation
+#
+# This script is to build sherpa-onnx for WebAssembly (speaker diarization)
+
+set -ex
+
+if [ x"$EMSCRIPTEN" == x"" ]; then
+ if ! command -v emcc &> /dev/null; then
+ echo "Please install emscripten first"
+ echo ""
+ echo "You can use the following commands to install it:"
+ echo ""
+ echo "git clone https://github.com/emscripten-core/emsdk.git"
+ echo "cd emsdk"
+ echo "git pull"
+ echo "./emsdk install 3.1.53"
+ echo "./emsdk activate 3.1.53"
+ echo "source ./emsdk_env.sh"
+ exit 1
+ else
+ EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+ emcc --version
+ fi
+fi
+
+export EMSCRIPTEN=$EMSCRIPTEN
+echo "EMSCRIPTEN: $EMSCRIPTEN"
+if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
+ echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
+ echo "Please make sure you have installed emsdk correctly"
+ exit 1
+fi
+
+mkdir -p build-wasm-simd-speaker-diarization
+pushd build-wasm-simd-speaker-diarization
+
+export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
+
+cmake \
+ -DCMAKE_INSTALL_PREFIX=./install \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
+ \
+ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+ -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+ -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+ -DBUILD_SHARED_LIBS=OFF \
+ -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+ -DSHERPA_ONNX_ENABLE_JNI=OFF \
+ -DSHERPA_ONNX_ENABLE_C_API=ON \
+ -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+ -DSHERPA_ONNX_ENABLE_GPU=OFF \
+ -DSHERPA_ONNX_ENABLE_WASM=ON \
+ -DSHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION=ON \
+ -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+ -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
+ ..
+make -j2
+make install
+
+ls -lh install/bin/wasm/speaker-diarization
diff --git a/build-wasm-simd-tts.sh b/build-wasm-simd-tts.sh
index 6835e4c433..c707bef6e3 100755
--- a/build-wasm-simd-tts.sh
+++ b/build-wasm-simd-tts.sh
@@ -14,12 +14,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
echo "git clone https://github.com/emscripten-core/emsdk.git"
echo "cd emsdk"
echo "git pull"
- echo "./emsdk install latest"
- echo "./emsdk activate latest"
+ echo "./emsdk install 3.1.53"
+ echo "./emsdk activate 3.1.53"
echo "source ./emsdk_env.sh"
exit 1
else
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+ emcc --version
fi
fi
diff --git a/build-wasm-simd-vad-asr.sh b/build-wasm-simd-vad-asr.sh
index 5d15cf6519..6219315500 100755
--- a/build-wasm-simd-vad-asr.sh
+++ b/build-wasm-simd-vad-asr.sh
@@ -15,12 +15,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
echo "git clone https://github.com/emscripten-core/emsdk.git"
echo "cd emsdk"
echo "git pull"
- echo "./emsdk install latest"
- echo "./emsdk activate latest"
+ echo "./emsdk install 3.1.53"
+ echo "./emsdk activate 3.1.53"
echo "source ./emsdk_env.sh"
exit 1
else
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+ emcc --version
fi
fi
diff --git a/build-wasm-simd-vad.sh b/build-wasm-simd-vad.sh
index c74f57d373..2ab11249db 100755
--- a/build-wasm-simd-vad.sh
+++ b/build-wasm-simd-vad.sh
@@ -14,12 +14,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
echo "git clone https://github.com/emscripten-core/emsdk.git"
echo "cd emsdk"
echo "git pull"
- echo "./emsdk install latest"
- echo "./emsdk activate latest"
+ echo "./emsdk install 3.1.53"
+ echo "./emsdk activate 3.1.53"
echo "source ./emsdk_env.sh"
exit 1
else
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+ emcc --version
fi
fi
diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt
index 0bf5264503..44c5814a53 100644
--- a/c-api-examples/CMakeLists.txt
+++ b/c-api-examples/CMakeLists.txt
@@ -4,9 +4,29 @@ include_directories(${CMAKE_SOURCE_DIR})
add_executable(decode-file-c-api decode-file-c-api.c)
target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
+add_executable(kws-c-api kws-c-api.c)
+target_link_libraries(kws-c-api sherpa-onnx-c-api)
+
if(SHERPA_ONNX_ENABLE_TTS)
add_executable(offline-tts-c-api offline-tts-c-api.c)
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
+
+ add_executable(matcha-tts-zh-c-api matcha-tts-zh-c-api.c)
+ target_link_libraries(matcha-tts-zh-c-api sherpa-onnx-c-api)
+
+ add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c)
+ target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api)
+
+ add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c)
+ target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api)
+
+ add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c)
+ target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api)
+endif()
+
+if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
+ add_executable(offline-speaker-diarization-c-api offline-speaker-diarization-c-api.c)
+ target_link_libraries(offline-speaker-diarization-c-api sherpa-onnx-c-api)
endif()
add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
@@ -30,6 +50,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api)
add_executable(sense-voice-c-api sense-voice-c-api.c)
target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)
+add_executable(moonshine-c-api moonshine-c-api.c)
+target_link_libraries(moonshine-c-api sherpa-onnx-c-api)
+
add_executable(zipformer-c-api zipformer-c-api.c)
target_link_libraries(zipformer-c-api sherpa-onnx-c-api)
@@ -48,6 +71,12 @@ target_link_libraries(telespeech-c-api sherpa-onnx-c-api)
add_executable(vad-sense-voice-c-api vad-sense-voice-c-api.c)
target_link_libraries(vad-sense-voice-c-api sherpa-onnx-c-api)
+add_executable(vad-whisper-c-api vad-whisper-c-api.c)
+target_link_libraries(vad-whisper-c-api sherpa-onnx-c-api)
+
+add_executable(vad-moonshine-c-api vad-moonshine-c-api.c)
+target_link_libraries(vad-moonshine-c-api sherpa-onnx-c-api)
+
add_executable(streaming-zipformer-buffered-tokens-hotwords-c-api
streaming-zipformer-buffered-tokens-hotwords-c-api.c)
target_link_libraries(streaming-zipformer-buffered-tokens-hotwords-c-api sherpa-onnx-c-api)
diff --git a/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c b/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
index ec8be3b075..45a0bb87a0 100644
--- a/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
+++ b/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
fprintf(stderr, "Memory error\n");
return -1;
}
- size_t read_bytes = fread(*buffer_out, 1, size, file);
+ size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
if (read_bytes != size) {
printf("Errors occured in reading the file %s\n", filename);
free((void *)*buffer_out);
diff --git a/c-api-examples/kokoro-tts-en-c-api.c b/c-api-examples/kokoro-tts-en-c-api.c
new file mode 100644
index 0000000000..44e6c28d89
--- /dev/null
+++ b/c-api-examples/kokoro-tts-en-c-api.c
@@ -0,0 +1,84 @@
+// c-api-examples/kokoro-tts-en-c-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for English TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+./kokoro-tts-en-c-api
+
+ */
+// clang-format on
+
+#include
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ SherpaOnnxOfflineTtsConfig config;
+ memset(&config, 0, sizeof(config));
+ config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
+ config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
+ config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
+ config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
+
+ config.model.num_threads = 2;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ const char *filename = "./generated-kokoro-en.wav";
+ const char *text =
+ "Today as always, men fall into two groups: slaves and free men. Whoever "
+ "does not have two-thirds of his day for himself, is a slave, whatever "
+ "he may be: a statesman, a businessman, an official, or a scholar. "
+ "Friends fell out often because life was changing so fast. The easiest "
+ "thing in the world was to lose touch with someone.";
+
+ const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+ // mapping of sid to voice name
+ // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+ // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+ int32_t sid = 0;
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+ ProgressCallback);
+#endif
+
+ SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+ SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+ SherpaOnnxDestroyOfflineTts(tts);
+
+ fprintf(stderr, "Input text is: %s\n", text);
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename);
+
+ return 0;
+}
diff --git a/c-api-examples/kokoro-tts-zh-en-c-api.c b/c-api-examples/kokoro-tts-zh-en-c-api.c
new file mode 100644
index 0000000000..4d998fb71d
--- /dev/null
+++ b/c-api-examples/kokoro-tts-zh-en-c-api.c
@@ -0,0 +1,82 @@
+// c-api-examples/kokoro-tts-zh-en-c-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for English + Chinese TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+./kokoro-tts-zh-en-c-api
+
+ */
+// clang-format on
+
+#include
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ SherpaOnnxOfflineTtsConfig config;
+ memset(&config, 0, sizeof(config));
+ config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
+ config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
+ config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
+ config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
+ config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
+ config.model.kokoro.lexicon =
+ "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
+ "lexicon-zh.txt";
+
+ config.model.num_threads = 2;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ const char *filename = "./generated-kokoro-zh-en.wav";
+ const char *text =
+ "中英文语音合成测试。This is generated by next generation Kaldi using "
+ "Kokoro without Misaki. 你觉得中英文说的如何呢?";
+
+ const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+ int32_t sid = 0; // there are 53 speakers
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+ ProgressCallback);
+#endif
+
+ SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+ SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+ SherpaOnnxDestroyOfflineTts(tts);
+
+ fprintf(stderr, "Input text is: %s\n", text);
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename);
+
+ return 0;
+}
diff --git a/c-api-examples/kws-c-api.c b/c-api-examples/kws-c-api.c
new file mode 100644
index 0000000000..8909809f43
--- /dev/null
+++ b/c-api-examples/kws-c-api.c
@@ -0,0 +1,152 @@
+// c-api-examples/kws-c-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+//
+// This file demonstrates how to use keywords spotter with sherpa-onnx's C
+// clang-format off
+//
+// Usage
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+//
+// ./kws-c-api
+//
+// clang-format on
+#include
+#include // exit
+#include // memset
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+ SherpaOnnxKeywordSpotterConfig config;
+
+ memset(&config, 0, sizeof(config));
+ config.model_config.transducer.encoder =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+ config.model_config.transducer.decoder =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
+
+ config.model_config.transducer.joiner =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+ config.model_config.tokens =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "tokens.txt";
+
+ config.model_config.provider = "cpu";
+ config.model_config.num_threads = 1;
+ config.model_config.debug = 1;
+
+ config.keywords_file =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "test_wavs/test_keywords.txt";
+
+ const SherpaOnnxKeywordSpotter *kws = SherpaOnnxCreateKeywordSpotter(&config);
+ if (!kws) {
+ fprintf(stderr, "Please check your config");
+ exit(-1);
+ }
+
+ fprintf(stderr,
+ "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n");
+
+ const char *wav_filename =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "test_wavs/3.wav";
+
+ float tail_paddings[8000] = {0}; // 0.5 seconds
+
+ const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+ if (wave == NULL) {
+ fprintf(stderr, "Failed to read %s\n", wav_filename);
+ exit(-1);
+ }
+
+ const SherpaOnnxOnlineStream *stream = SherpaOnnxCreateKeywordStream(kws);
+ if (!stream) {
+ fprintf(stderr, "Failed to create stream\n");
+ exit(-1);
+ }
+
+ SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
+ wave->num_samples);
+
+ SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
+ sizeof(tail_paddings) / sizeof(float));
+ SherpaOnnxOnlineStreamInputFinished(stream);
+ while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
+ SherpaOnnxDecodeKeywordStream(kws, stream);
+ const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
+ if (r && r->json && strlen(r->keyword)) {
+ fprintf(stderr, "Detected keyword: %s\n", r->json);
+
+ // Remember to reset the keyword stream right after a keyword is detected
+ SherpaOnnxResetKeywordStream(kws, stream);
+ }
+ SherpaOnnxDestroyKeywordResult(r);
+ }
+ SherpaOnnxDestroyOnlineStream(stream);
+
+ // --------------------------------------------------------------------------
+
+ fprintf(stderr, "--Use pre-defined keywords + add a new keyword--\n");
+
+ stream = SherpaOnnxCreateKeywordStreamWithKeywords(kws, "y ǎn y uán @演员");
+
+ SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
+ wave->num_samples);
+
+ SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
+ sizeof(tail_paddings) / sizeof(float));
+ SherpaOnnxOnlineStreamInputFinished(stream);
+ while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
+ SherpaOnnxDecodeKeywordStream(kws, stream);
+ const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
+ if (r && r->json && strlen(r->keyword)) {
+ fprintf(stderr, "Detected keyword: %s\n", r->json);
+
+ // Remember to reset the keyword stream
+ SherpaOnnxResetKeywordStream(kws, stream);
+ }
+ SherpaOnnxDestroyKeywordResult(r);
+ }
+ SherpaOnnxDestroyOnlineStream(stream);
+
+ // --------------------------------------------------------------------------
+
+ fprintf(stderr, "--Use pre-defined keywords + add two new keywords--\n");
+
+ stream = SherpaOnnxCreateKeywordStreamWithKeywords(
+ kws, "y ǎn y uán @演员/zh ī m íng @知名");
+
+ SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
+ wave->num_samples);
+
+ SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
+ sizeof(tail_paddings) / sizeof(float));
+ SherpaOnnxOnlineStreamInputFinished(stream);
+ while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
+ SherpaOnnxDecodeKeywordStream(kws, stream);
+ const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
+ if (r && r->json && strlen(r->keyword)) {
+ fprintf(stderr, "Detected keyword: %s\n", r->json);
+
+ // Remember to reset the keyword stream
+ SherpaOnnxResetKeywordStream(kws, stream);
+ }
+ SherpaOnnxDestroyKeywordResult(r);
+ }
+ SherpaOnnxDestroyOnlineStream(stream);
+
+ SherpaOnnxFreeWave(wave);
+ SherpaOnnxDestroyKeywordSpotter(kws);
+
+ return 0;
+}
diff --git a/c-api-examples/matcha-tts-en-c-api.c b/c-api-examples/matcha-tts-en-c-api.c
new file mode 100644
index 0000000000..99b0a9742d
--- /dev/null
+++ b/c-api-examples/matcha-tts-en-c-api.c
@@ -0,0 +1,87 @@
+// c-api-examples/matcha-tts-en-c-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for English TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-en-c-api
+
+ */
+// clang-format on
+
+#include
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ SherpaOnnxOfflineTtsConfig config;
+ memset(&config, 0, sizeof(config));
+ config.model.matcha.acoustic_model =
+ "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
+
+ config.model.matcha.vocoder = "./hifigan_v2.onnx";
+
+ config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
+
+ config.model.matcha.data_dir =
+ "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
+
+ config.model.num_threads = 1;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ const char *filename = "./generated-matcha-en.wav";
+ const char *text =
+ "Today as always, men fall into two groups: slaves and free men. Whoever "
+ "does not have two-thirds of his day for himself, is a slave, whatever "
+ "he may be: a statesman, a businessman, an official, or a scholar. "
+ "Friends fell out often because life was changing so fast. The easiest "
+ "thing in the world was to lose touch with someone.";
+
+ const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+ int32_t sid = 0;
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+ ProgressCallback);
+#endif
+
+ SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+ SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+ SherpaOnnxDestroyOfflineTts(tts);
+
+ fprintf(stderr, "Input text is: %s\n", text);
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename);
+
+ return 0;
+}
diff --git a/c-api-examples/matcha-tts-zh-c-api.c b/c-api-examples/matcha-tts-zh-c-api.c
new file mode 100644
index 0000000000..9fb9f4597d
--- /dev/null
+++ b/c-api-examples/matcha-tts-zh-c-api.c
@@ -0,0 +1,87 @@
+// c-api-examples/matcha-tts-zh-c-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for Chinese TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-zh-c-api
+
+ */
+// clang-format on
+
+#include
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ SherpaOnnxOfflineTtsConfig config;
+ memset(&config, 0, sizeof(config));
+ config.model.matcha.acoustic_model =
+ "./matcha-icefall-zh-baker/model-steps-3.onnx";
+ config.model.matcha.vocoder = "./hifigan_v2.onnx";
+ config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
+ config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
+ config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
+ config.model.num_threads = 1;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ // clang-format off
+ config.rule_fsts = "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";
+ // clang-format on
+
+ const char *filename = "./generated-matcha-zh.wav";
+ const char *text =
+ "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如"
+ "涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感"
+ "受着生命的奇迹与温柔."
+ "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; "
+ "经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。";
+
+ const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+ int32_t sid = 0;
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+ const SherpaOnnxGeneratedAudio *audio =
+ SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+ ProgressCallback);
+#endif
+
+ SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+ SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+ SherpaOnnxDestroyOfflineTts(tts);
+
+ fprintf(stderr, "Input text is: %s\n", text);
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename);
+
+ return 0;
+}
diff --git a/c-api-examples/moonshine-c-api.c b/c-api-examples/moonshine-c-api.c
new file mode 100644
index 0000000000..775dd24c98
--- /dev/null
+++ b/c-api-examples/moonshine-c-api.c
@@ -0,0 +1,83 @@
+// c-api-examples/moonshine-c-api.c
+//
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use Moonshine tiny with sherpa-onnx's C API.
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+//
+// clang-format on
+
+#include
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+ const char *wav_filename =
+ "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
+ const char *preprocessor =
+ "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+ const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+ const char *uncached_decoder =
+ "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+ const char *cached_decoder =
+ "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+ const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+ const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+ if (wave == NULL) {
+ fprintf(stderr, "Failed to read %s\n", wav_filename);
+ return -1;
+ }
+
+ // Offline model config
+ SherpaOnnxOfflineModelConfig offline_model_config;
+ memset(&offline_model_config, 0, sizeof(offline_model_config));
+ offline_model_config.debug = 1;
+ offline_model_config.num_threads = 1;
+ offline_model_config.provider = "cpu";
+ offline_model_config.tokens = tokens;
+ offline_model_config.moonshine.preprocessor = preprocessor;
+ offline_model_config.moonshine.encoder = encoder;
+ offline_model_config.moonshine.uncached_decoder = uncached_decoder;
+ offline_model_config.moonshine.cached_decoder = cached_decoder;
+
+ // Recognizer config
+ SherpaOnnxOfflineRecognizerConfig recognizer_config;
+ memset(&recognizer_config, 0, sizeof(recognizer_config));
+ recognizer_config.decoding_method = "greedy_search";
+ recognizer_config.model_config = offline_model_config;
+
+ const SherpaOnnxOfflineRecognizer *recognizer =
+ SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
+
+ if (recognizer == NULL) {
+ fprintf(stderr, "Please check your config!\n");
+ SherpaOnnxFreeWave(wave);
+ return -1;
+ }
+
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
+
+ SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
+ wave->num_samples);
+ SherpaOnnxDecodeOfflineStream(recognizer, stream);
+ const SherpaOnnxOfflineRecognizerResult *result =
+ SherpaOnnxGetOfflineStreamResult(stream);
+
+ fprintf(stderr, "Decoded text: %s\n", result->text);
+
+ SherpaOnnxDestroyOfflineRecognizerResult(result);
+ SherpaOnnxDestroyOfflineStream(stream);
+ SherpaOnnxDestroyOfflineRecognizer(recognizer);
+ SherpaOnnxFreeWave(wave);
+
+ return 0;
+}
diff --git a/c-api-examples/offline-speaker-diarization-c-api.c b/c-api-examples/offline-speaker-diarization-c-api.c
new file mode 100644
index 0000000000..d5a17dd0b7
--- /dev/null
+++ b/c-api-examples/offline-speaker-diarization-c-api.c
@@ -0,0 +1,131 @@
+// c-api-examples/offline-sepaker-diarization-c-api.c
+//
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to implement speaker diarization with
+// sherpa-onnx's C API.
+
+// clang-format off
+/*
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+
+ */
+// clang-format on
+
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(int32_t num_processed_chunks,
+ int32_t num_total_chunks, void *arg) {
+ float progress = 100.0 * num_processed_chunks / num_total_chunks;
+ fprintf(stderr, "progress %.2f%%\n", progress);
+
+ // the return value is currently ignored
+ return 0;
+}
+
+int main() {
+ // Please see the comments at the start of this file for how to download
+ // the .onnx file and .wav files below
+ const char *segmentation_model =
+ "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+
+ const char *embedding_extractor_model =
+ "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+
+ const char *wav_filename = "./0-four-speakers-zh.wav";
+
+ const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+ if (wave == NULL) {
+ fprintf(stderr, "Failed to read %s\n", wav_filename);
+ return -1;
+ }
+
+ SherpaOnnxOfflineSpeakerDiarizationConfig config;
+ memset(&config, 0, sizeof(config));
+
+ config.segmentation.pyannote.model = segmentation_model;
+ config.embedding.model = embedding_extractor_model;
+
+ // the test wave ./0-four-speakers-zh.wav has 4 speakers, so
+ // we set num_clusters to 4
+ //
+ config.clustering.num_clusters = 4;
+ // If you don't know the number of speakers in the test wave file, please
+ // use
+ // config.clustering.threshold = 0.5; // You need to tune this threshold
+
+ const SherpaOnnxOfflineSpeakerDiarization *sd =
+ SherpaOnnxCreateOfflineSpeakerDiarization(&config);
+
+ if (!sd) {
+ fprintf(stderr, "Failed to initialize offline speaker diarization\n");
+ return -1;
+ }
+
+ if (SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd) !=
+ wave->sample_rate) {
+ fprintf(
+ stderr,
+ "Expected sample rate: %d. Actual sample rate from the wave file: %d\n",
+ SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd),
+ wave->sample_rate);
+ goto failed;
+ }
+
+ const SherpaOnnxOfflineSpeakerDiarizationResult *result =
+ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
+ sd, wave->samples, wave->num_samples, ProgressCallback, NULL);
+ if (!result) {
+ fprintf(stderr, "Failed to do speaker diarization");
+ goto failed;
+ }
+
+ int32_t num_segments =
+ SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);
+
+ const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
+ SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);
+
+ for (int32_t i = 0; i != num_segments; ++i) {
+ fprintf(stderr, "%.3f -- %.3f speaker_%02d\n", segments[i].start,
+ segments[i].end, segments[i].speaker);
+ }
+
+failed:
+
+ SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
+ SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);
+ SherpaOnnxDestroyOfflineSpeakerDiarization(sd);
+ SherpaOnnxFreeWave(wave);
+
+ return 0;
+}
diff --git a/c-api-examples/offline-tts-c-api.c b/c-api-examples/offline-tts-c-api.c
index 7fbdb004ca..eaa25af392 100644
--- a/c-api-examples/offline-tts-c-api.c
+++ b/c-api-examples/offline-tts-c-api.c
@@ -229,7 +229,7 @@ int32_t main(int32_t argc, char *argv[]) {
ShowUsage();
}
- SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+ const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid, 1.0);
diff --git a/c-api-examples/paraformer-c-api.c b/c-api-examples/paraformer-c-api.c
index 345aed5557..98d38c7898 100644
--- a/c-api-examples/paraformer-c-api.c
+++ b/c-api-examples/paraformer-c-api.c
@@ -54,7 +54,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;
- SherpaOnnxOfflineRecognizer *recognizer =
+ const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -63,7 +63,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
diff --git a/c-api-examples/sense-voice-c-api.c b/c-api-examples/sense-voice-c-api.c
index 06e890636e..25d58219e7 100644
--- a/c-api-examples/sense-voice-c-api.c
+++ b/c-api-examples/sense-voice-c-api.c
@@ -56,7 +56,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;
- SherpaOnnxOfflineRecognizer *recognizer =
+ const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -65,7 +65,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
diff --git a/c-api-examples/streaming-ctc-buffered-tokens-c-api.c b/c-api-examples/streaming-ctc-buffered-tokens-c-api.c
index 3223772a87..98f5b4a60a 100644
--- a/c-api-examples/streaming-ctc-buffered-tokens-c-api.c
+++ b/c-api-examples/streaming-ctc-buffered-tokens-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
fprintf(stderr, "Memory error\n");
return -1;
}
- size_t read_bytes = fread(*buffer_out, 1, size, file);
+ size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
if (read_bytes != size) {
printf("Errors occured in reading the file %s\n", filename);
free((void *)*buffer_out);
@@ -95,7 +95,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = online_model_config;
- SherpaOnnxOnlineRecognizer *recognizer =
+ const SherpaOnnxOnlineRecognizer *recognizer =
SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
free((void *)tokens_buf);
@@ -107,7 +107,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+ const SherpaOnnxOnlineStream *stream =
+ SherpaOnnxCreateOnlineStream(recognizer);
const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c b/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
index cd87177b58..0c382cc941 100644
--- a/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
+++ b/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
fprintf(stderr, "Memory error\n");
return -1;
}
- size_t read_bytes = fread(*buffer_out, 1, size, file);
+ size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
if (read_bytes != size) {
printf("Errors occured in reading the file %s\n", filename);
free((void *)*buffer_out);
@@ -96,7 +96,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = online_model_config;
- SherpaOnnxOnlineRecognizer *recognizer =
+ const SherpaOnnxOnlineRecognizer *recognizer =
SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
free((void *)tokens_buf);
@@ -108,7 +108,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+ const SherpaOnnxOnlineStream *stream =
+ SherpaOnnxCreateOnlineStream(recognizer);
const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-paraformer-c-api.c b/c-api-examples/streaming-paraformer-c-api.c
index b54116f083..384ea411b1 100644
--- a/c-api-examples/streaming-paraformer-c-api.c
+++ b/c-api-examples/streaming-paraformer-c-api.c
@@ -57,7 +57,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = online_model_config;
- SherpaOnnxOnlineRecognizer *recognizer =
+ const SherpaOnnxOnlineRecognizer *recognizer =
SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -66,7 +66,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+ const SherpaOnnxOnlineStream *stream =
+ SherpaOnnxCreateOnlineStream(recognizer);
const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c b/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
index d5092c5cc2..bd76ea8abb 100644
--- a/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
+++ b/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
fprintf(stderr, "Memory error\n");
return -1;
}
- size_t read_bytes = fread(*buffer_out, 1, size, file);
+ size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
if (read_bytes != size) {
printf("Errors occured in reading the file %s\n", filename);
free((void *)*buffer_out);
@@ -116,7 +116,7 @@ int32_t main() {
recognizer_config.hotwords_buf = hotwords_buf;
recognizer_config.hotwords_buf_size = hotwords_buf_size;
- SherpaOnnxOnlineRecognizer *recognizer =
+ const SherpaOnnxOnlineRecognizer *recognizer =
SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
free((void *)tokens_buf);
@@ -130,7 +130,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+ const SherpaOnnxOnlineStream *stream =
+ SherpaOnnxCreateOnlineStream(recognizer);
const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-zipformer-c-api.c b/c-api-examples/streaming-zipformer-c-api.c
index e1417639d9..6011186ea1 100644
--- a/c-api-examples/streaming-zipformer-c-api.c
+++ b/c-api-examples/streaming-zipformer-c-api.c
@@ -63,7 +63,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = online_model_config;
- SherpaOnnxOnlineRecognizer *recognizer =
+ const SherpaOnnxOnlineRecognizer *recognizer =
SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -72,7 +72,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+ const SherpaOnnxOnlineStream *stream =
+ SherpaOnnxCreateOnlineStream(recognizer);
const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
int32_t segment_id = 0;
diff --git a/c-api-examples/telespeech-c-api.c b/c-api-examples/telespeech-c-api.c
index fa7824c3be..9bf34b1a87 100644
--- a/c-api-examples/telespeech-c-api.c
+++ b/c-api-examples/telespeech-c-api.c
@@ -49,7 +49,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;
- SherpaOnnxOfflineRecognizer *recognizer =
+ const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -58,7 +58,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
diff --git a/c-api-examples/vad-moonshine-c-api.c b/c-api-examples/vad-moonshine-c-api.c
new file mode 100644
index 0000000000..2ad6f6d631
--- /dev/null
+++ b/c-api-examples/vad-moonshine-c-api.c
@@ -0,0 +1,146 @@
+// c-api-examples/vad-moonshine-c-api.c
+//
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use VAD + Moonshine with sherpa-onnx's C API.
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+//
+// clang-format on
+
+#include
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+ const char *wav_filename = "./Obama.wav";
+ const char *vad_filename = "./silero_vad.onnx";
+
+ const char *preprocessor =
+ "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+ const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+ const char *uncached_decoder =
+ "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+ const char *cached_decoder =
+ "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+ const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+ const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+ if (wave == NULL) {
+ fprintf(stderr, "Failed to read %s\n", wav_filename);
+ return -1;
+ }
+
+ if (wave->sample_rate != 16000) {
+ fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
+ wave->sample_rate);
+ SherpaOnnxFreeWave(wave);
+ return -1;
+ }
+
+ // Offline model config
+ SherpaOnnxOfflineModelConfig offline_model_config;
+ memset(&offline_model_config, 0, sizeof(offline_model_config));
+ offline_model_config.debug = 0;
+ offline_model_config.num_threads = 1;
+ offline_model_config.provider = "cpu";
+ offline_model_config.tokens = tokens;
+ offline_model_config.moonshine.preprocessor = preprocessor;
+ offline_model_config.moonshine.encoder = encoder;
+ offline_model_config.moonshine.uncached_decoder = uncached_decoder;
+ offline_model_config.moonshine.cached_decoder = cached_decoder;
+
+ // Recognizer config
+ SherpaOnnxOfflineRecognizerConfig recognizer_config;
+ memset(&recognizer_config, 0, sizeof(recognizer_config));
+ recognizer_config.decoding_method = "greedy_search";
+ recognizer_config.model_config = offline_model_config;
+
+ const SherpaOnnxOfflineRecognizer *recognizer =
+ SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
+
+ if (recognizer == NULL) {
+ fprintf(stderr, "Please check your recognizer config!\n");
+ SherpaOnnxFreeWave(wave);
+ return -1;
+ }
+
+ SherpaOnnxVadModelConfig vadConfig;
+ memset(&vadConfig, 0, sizeof(vadConfig));
+ vadConfig.silero_vad.model = vad_filename;
+ vadConfig.silero_vad.threshold = 0.5;
+ vadConfig.silero_vad.min_silence_duration = 0.5;
+ vadConfig.silero_vad.min_speech_duration = 0.5;
+ vadConfig.silero_vad.max_speech_duration = 10;
+ vadConfig.silero_vad.window_size = 512;
+ vadConfig.sample_rate = 16000;
+ vadConfig.num_threads = 1;
+ vadConfig.debug = 1;
+
+ SherpaOnnxVoiceActivityDetector *vad =
+ SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);
+
+ if (vad == NULL) {
+ fprintf(stderr, "Please check your recognizer config!\n");
+ SherpaOnnxFreeWave(wave);
+ SherpaOnnxDestroyOfflineRecognizer(recognizer);
+ return -1;
+ }
+
+ int32_t window_size = vadConfig.silero_vad.window_size;
+ int32_t i = 0;
+ int is_eof = 0;
+
+ while (!is_eof) {
+ if (i + window_size < wave->num_samples) {
+ SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
+ window_size);
+ } else {
+ SherpaOnnxVoiceActivityDetectorFlush(vad);
+ is_eof = 1;
+ }
+ while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
+ const SherpaOnnxSpeechSegment *segment =
+ SherpaOnnxVoiceActivityDetectorFront(vad);
+
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
+
+ SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
+ segment->samples, segment->n);
+
+ SherpaOnnxDecodeOfflineStream(recognizer, stream);
+
+ const SherpaOnnxOfflineRecognizerResult *result =
+ SherpaOnnxGetOfflineStreamResult(stream);
+
+ float start = segment->start / 16000.0f;
+ float duration = segment->n / 16000.0f;
+ float stop = start + duration;
+
+ fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
+
+ SherpaOnnxDestroyOfflineRecognizerResult(result);
+ SherpaOnnxDestroyOfflineStream(stream);
+
+ SherpaOnnxDestroySpeechSegment(segment);
+ SherpaOnnxVoiceActivityDetectorPop(vad);
+ }
+ i += window_size;
+ }
+
+ SherpaOnnxDestroyOfflineRecognizer(recognizer);
+ SherpaOnnxDestroyVoiceActivityDetector(vad);
+ SherpaOnnxFreeWave(wave);
+
+ return 0;
+}
diff --git a/c-api-examples/vad-sense-voice-c-api.c b/c-api-examples/vad-sense-voice-c-api.c
index 172ec0a799..eeddfce883 100644
--- a/c-api-examples/vad-sense-voice-c-api.c
+++ b/c-api-examples/vad-sense-voice-c-api.c
@@ -66,7 +66,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;
- SherpaOnnxOfflineRecognizer *recognizer =
+ const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -81,6 +81,7 @@ int32_t main() {
vadConfig.silero_vad.threshold = 0.5;
vadConfig.silero_vad.min_silence_duration = 0.5;
vadConfig.silero_vad.min_speech_duration = 0.5;
+ vadConfig.silero_vad.max_speech_duration = 5;
vadConfig.silero_vad.window_size = 512;
vadConfig.sample_rate = 16000;
vadConfig.num_threads = 1;
@@ -98,18 +99,24 @@ int32_t main() {
int32_t window_size = vadConfig.silero_vad.window_size;
int32_t i = 0;
-
- while (i + window_size < wave->num_samples) {
- SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
- window_size);
- i += window_size;
+ int is_eof = 0;
+
+ while (!is_eof) {
+ if (i + window_size < wave->num_samples) {
+ SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
+ window_size);
+ } else {
+ SherpaOnnxVoiceActivityDetectorFlush(vad);
+ is_eof = 1;
+ }
while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
const SherpaOnnxSpeechSegment *segment =
SherpaOnnxVoiceActivityDetectorFront(vad);
- SherpaOnnxOfflineStream *stream =
+ const SherpaOnnxOfflineStream *stream =
SherpaOnnxCreateOfflineStream(recognizer);
+
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
segment->samples, segment->n);
@@ -130,34 +137,7 @@ int32_t main() {
SherpaOnnxDestroySpeechSegment(segment);
SherpaOnnxVoiceActivityDetectorPop(vad);
}
- }
-
- SherpaOnnxVoiceActivityDetectorFlush(vad);
-
- while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
- const SherpaOnnxSpeechSegment *segment =
- SherpaOnnxVoiceActivityDetectorFront(vad);
-
- SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
- SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples,
- segment->n);
-
- SherpaOnnxDecodeOfflineStream(recognizer, stream);
-
- const SherpaOnnxOfflineRecognizerResult *result =
- SherpaOnnxGetOfflineStreamResult(stream);
-
- float start = segment->start / 16000.0f;
- float duration = segment->n / 16000.0f;
- float stop = start + duration;
-
- fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
-
- SherpaOnnxDestroyOfflineRecognizerResult(result);
- SherpaOnnxDestroyOfflineStream(stream);
-
- SherpaOnnxDestroySpeechSegment(segment);
- SherpaOnnxVoiceActivityDetectorPop(vad);
+ i += window_size;
}
SherpaOnnxDestroyOfflineRecognizer(recognizer);
diff --git a/c-api-examples/vad-whisper-c-api.c b/c-api-examples/vad-whisper-c-api.c
new file mode 100644
index 0000000000..169b4ef126
--- /dev/null
+++ b/c-api-examples/vad-whisper-c-api.c
@@ -0,0 +1,169 @@
+// c-api-examples/vad-whisper-c-api.c
+//
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use VAD + Whisper tiny.en with
+// sherpa-onnx's C API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+// rm sherpa-onnx-whisper-tiny.en.tar.bz2
+//
+// clang-format on
+
+#include
+#include
+#include
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+ const char *wav_filename = "./Obama.wav";
+ const char *vad_filename = "./silero_vad.onnx";
+
+ const char *encoder = "sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
+ const char *decoder = "sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
+ const char *tokens = "sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";
+
+ const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+ if (wave == NULL) {
+ fprintf(stderr, "Failed to read %s\n", wav_filename);
+ return -1;
+ }
+
+ if (wave->sample_rate != 16000) {
+ fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
+ wave->sample_rate);
+ SherpaOnnxFreeWave(wave);
+ return -1;
+ }
+
+ // Offline model config
+ SherpaOnnxOfflineModelConfig offline_model_config;
+ memset(&offline_model_config, 0, sizeof(offline_model_config));
+ offline_model_config.debug = 0;
+ offline_model_config.num_threads = 1;
+ offline_model_config.provider = "cpu";
+ offline_model_config.tokens = tokens;
+ offline_model_config.whisper.encoder = encoder;
+ offline_model_config.whisper.decoder = decoder;
+ offline_model_config.whisper.language = "en";
+ offline_model_config.whisper.tail_paddings = 0;
+ offline_model_config.whisper.task = "transcribe";
+
+ // Recognizer config
+ SherpaOnnxOfflineRecognizerConfig recognizer_config;
+ memset(&recognizer_config, 0, sizeof(recognizer_config));
+ recognizer_config.decoding_method = "greedy_search";
+ recognizer_config.model_config = offline_model_config;
+
+ const SherpaOnnxOfflineRecognizer *recognizer =
+ SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
+
+ if (recognizer == NULL) {
+ fprintf(stderr, "Please check your recognizer config!\n");
+ SherpaOnnxFreeWave(wave);
+ return -1;
+ }
+
+ SherpaOnnxVadModelConfig vadConfig;
+ memset(&vadConfig, 0, sizeof(vadConfig));
+ vadConfig.silero_vad.model = vad_filename;
+ vadConfig.silero_vad.threshold = 0.5;
+ vadConfig.silero_vad.min_silence_duration = 0.5;
+ vadConfig.silero_vad.min_speech_duration = 0.5;
+ vadConfig.silero_vad.max_speech_duration = 10;
+ vadConfig.silero_vad.window_size = 512;
+ vadConfig.sample_rate = 16000;
+ vadConfig.num_threads = 1;
+ vadConfig.debug = 1;
+
+ SherpaOnnxVoiceActivityDetector *vad =
+ SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);
+
+ if (vad == NULL) {
+ fprintf(stderr, "Please check your recognizer config!\n");
+ SherpaOnnxFreeWave(wave);
+ SherpaOnnxDestroyOfflineRecognizer(recognizer);
+ return -1;
+ }
+
+ int32_t window_size = vadConfig.silero_vad.window_size;
+ int32_t i = 0;
+
+ while (i + window_size < wave->num_samples) {
+ SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
+ window_size);
+ i += window_size;
+
+ while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
+ const SherpaOnnxSpeechSegment *segment =
+ SherpaOnnxVoiceActivityDetectorFront(vad);
+
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
+
+ SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
+ segment->samples, segment->n);
+
+ SherpaOnnxDecodeOfflineStream(recognizer, stream);
+
+ const SherpaOnnxOfflineRecognizerResult *result =
+ SherpaOnnxGetOfflineStreamResult(stream);
+
+ float start = segment->start / 16000.0f;
+ float duration = segment->n / 16000.0f;
+ float stop = start + duration;
+
+ fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
+
+ SherpaOnnxDestroyOfflineRecognizerResult(result);
+ SherpaOnnxDestroyOfflineStream(stream);
+
+ SherpaOnnxDestroySpeechSegment(segment);
+ SherpaOnnxVoiceActivityDetectorPop(vad);
+ }
+ }
+
+ SherpaOnnxVoiceActivityDetectorFlush(vad);
+
+ while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
+ const SherpaOnnxSpeechSegment *segment =
+ SherpaOnnxVoiceActivityDetectorFront(vad);
+
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
+
+ SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples,
+ segment->n);
+
+ SherpaOnnxDecodeOfflineStream(recognizer, stream);
+
+ const SherpaOnnxOfflineRecognizerResult *result =
+ SherpaOnnxGetOfflineStreamResult(stream);
+
+ float start = segment->start / 16000.0f;
+ float duration = segment->n / 16000.0f;
+ float stop = start + duration;
+
+ fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
+
+ SherpaOnnxDestroyOfflineRecognizerResult(result);
+ SherpaOnnxDestroyOfflineStream(stream);
+
+ SherpaOnnxDestroySpeechSegment(segment);
+ SherpaOnnxVoiceActivityDetectorPop(vad);
+ }
+
+ SherpaOnnxDestroyOfflineRecognizer(recognizer);
+ SherpaOnnxDestroyVoiceActivityDetector(vad);
+ SherpaOnnxFreeWave(wave);
+
+ return 0;
+}
diff --git a/c-api-examples/whisper-c-api.c b/c-api-examples/whisper-c-api.c
index 3a71bcb030..2e795b0253 100644
--- a/c-api-examples/whisper-c-api.c
+++ b/c-api-examples/whisper-c-api.c
@@ -58,7 +58,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;
- SherpaOnnxOfflineRecognizer *recognizer =
+ const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -69,7 +69,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
diff --git a/c-api-examples/zipformer-c-api.c b/c-api-examples/zipformer-c-api.c
index 35393b19c5..4db22fc38e 100644
--- a/c-api-examples/zipformer-c-api.c
+++ b/c-api-examples/zipformer-c-api.c
@@ -60,7 +60,7 @@ int32_t main() {
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;
- SherpaOnnxOfflineRecognizer *recognizer =
+ const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
if (recognizer == NULL) {
@@ -69,7 +69,8 @@ int32_t main() {
return -1;
}
- SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxCreateOfflineStream(recognizer);
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
diff --git a/cmake/asio.cmake b/cmake/asio.cmake
index eaa262acbe..9e3ce8d235 100644
--- a/cmake/asio.cmake
+++ b/cmake/asio.cmake
@@ -2,7 +2,7 @@ function(download_asio)
include(FetchContent)
set(asio_URL "https://github.com/chriskohlhoff/asio/archive/refs/tags/asio-1-24-0.tar.gz")
- set(asio_URL2 "https://hub.nuaa.cf/chriskohlhoff/asio/archive/refs/tags/asio-1-24-0.tar.gz")
+ set(asio_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/asio-asio-1-24-0.tar.gz")
set(asio_HASH "SHA256=cbcaaba0f66722787b1a7c33afe1befb3a012b5af3ad7da7ff0f6b8c9b7a8a5b")
# If you don't have access to the Internet,
diff --git a/cmake/cargs.cmake b/cmake/cargs.cmake
index 54487a6f0c..d7c6055087 100644
--- a/cmake/cargs.cmake
+++ b/cmake/cargs.cmake
@@ -2,7 +2,7 @@ function(download_cargs)
include(FetchContent)
set(cargs_URL "https://github.com/likle/cargs/archive/refs/tags/v1.0.3.tar.gz")
- set(cargs_URL2 "https://hub.nuaa.cf/likle/cargs/archive/refs/tags/v1.0.3.tar.gz")
+ set(cargs_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/cargs-1.0.3.tar.gz")
set(cargs_HASH "SHA256=ddba25bd35e9c6c75bc706c126001b8ce8e084d40ef37050e6aa6963e836eb8b")
# If you don't have access to the Internet,
diff --git a/cmake/cmake_extension.py b/cmake/cmake_extension.py
index 672e3d17ac..3d0dbef8e8 100644
--- a/cmake/cmake_extension.py
+++ b/cmake/cmake_extension.py
@@ -55,6 +55,7 @@ def get_binaries():
"sherpa-onnx-offline-audio-tagging",
"sherpa-onnx-offline-language-identification",
"sherpa-onnx-offline-punctuation",
+ "sherpa-onnx-offline-speaker-diarization",
"sherpa-onnx-offline-tts",
"sherpa-onnx-offline-tts-play",
"sherpa-onnx-offline-websocket-server",
@@ -79,6 +80,7 @@ def get_binaries():
binaries += [
"onnxruntime.dll",
"sherpa-onnx-c-api.dll",
+ "sherpa-onnx-cxx-api.dll",
]
return binaries
diff --git a/cmake/cppjieba.cmake b/cmake/cppjieba.cmake
index 9ad27d7b51..167da338f4 100644
--- a/cmake/cppjieba.cmake
+++ b/cmake/cppjieba.cmake
@@ -2,7 +2,7 @@ function(download_cppjieba)
include(FetchContent)
set(cppjieba_URL "https://github.com/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
- set(cppjieba_URL2 "https://hub.nuaa.cf/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
+ set(cppjieba_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/cppjieba-sherpa-onnx-2024-04-19.tar.gz")
set(cppjieba_HASH "SHA256=03e5264687f0efaef05487a07d49c3f4c0f743347bfbf825df4b30cc75ac5288")
# If you don't have access to the Internet,
diff --git a/cmake/eigen.cmake b/cmake/eigen.cmake
index 154cdd4c2b..9aef9abc88 100644
--- a/cmake/eigen.cmake
+++ b/cmake/eigen.cmake
@@ -2,7 +2,7 @@ function(download_eigen)
include(FetchContent)
set(eigen_URL "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz")
- set(eigen_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/eigen-3.4.0.tar.gz")
+ set(eigen_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/eigen-3.4.0.tar.gz")
set(eigen_HASH "SHA256=8586084f71f9bde545ee7fa6d00288b264a2b7ac3607b974e54d13e7162c1c72")
# If you don't have access to the Internet,
diff --git a/cmake/espeak-ng-for-piper.cmake b/cmake/espeak-ng-for-piper.cmake
index b54a0a6bd3..0ef8253060 100644
--- a/cmake/espeak-ng-for-piper.cmake
+++ b/cmake/espeak-ng-for-piper.cmake
@@ -2,7 +2,7 @@ function(download_espeak_ng_for_piper)
include(FetchContent)
set(espeak_ng_URL "https://github.com/csukuangfj/espeak-ng/archive/f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
- set(espeak_ng_URL2 "https://hub.nuaa.cf/csukuangfj/espeak-ng/archive/f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
+ set(espeak_ng_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
set(espeak_ng_HASH "SHA256=70cbf4050e7a014aae19140b05e57249da4720f56128459fbe3a93beaf971ae6")
set(BUILD_ESPEAK_NG_TESTS OFF CACHE BOOL "" FORCE)
diff --git a/cmake/googletest.cmake b/cmake/googletest.cmake
index cf5fa10cc0..a9bfd443b0 100644
--- a/cmake/googletest.cmake
+++ b/cmake/googletest.cmake
@@ -2,7 +2,7 @@ function(download_googltest)
include(FetchContent)
set(googletest_URL "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
- set(googletest_URL2 "https://hub.nuaa.cf/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
+ set(googletest_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/googletest-1.13.0.tar.gz")
set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363")
# If you don't have access to the Internet,
diff --git a/cmake/hclust-cpp.cmake b/cmake/hclust-cpp.cmake
index 9040815255..c84ccafc83 100644
--- a/cmake/hclust-cpp.cmake
+++ b/cmake/hclust-cpp.cmake
@@ -3,6 +3,7 @@ function(download_hclust_cpp)
# The latest commit as of 2024.09.29
set(hclust_cpp_URL "https://github.com/csukuangfj/hclust-cpp/archive/refs/tags/2024-09-29.tar.gz")
+ set(hclust_cpp_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/hclust-cpp-2024-09-29.tar.gz")
set(hclust_cpp_HASH "SHA256=abab51448a3cb54272aae07522970306e0b2cc6479d59d7b19e7aee4d6cedd33")
# If you don't have access to the Internet,
@@ -20,6 +21,7 @@ function(download_hclust_cpp)
set(hclust_cpp_URL "${f}")
file(TO_CMAKE_PATH "${hclust_cpp_URL}" hclust_cpp_URL)
message(STATUS "Found local downloaded hclust_cpp: ${hclust_cpp_URL}")
+ set(hclust_cpp_URL2)
break()
endif()
endforeach()
diff --git a/cmake/kaldi-decoder.cmake b/cmake/kaldi-decoder.cmake
index d3d7ec2d56..91202342a5 100644
--- a/cmake/kaldi-decoder.cmake
+++ b/cmake/kaldi-decoder.cmake
@@ -2,7 +2,7 @@ function(download_kaldi_decoder)
include(FetchContent)
set(kaldi_decoder_URL "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.6.tar.gz")
- set(kaldi_decoder_URL2 "https://hub.nuaa.cf/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.6.tar.gz")
+ set(kaldi_decoder_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-decoder-0.2.6.tar.gz")
set(kaldi_decoder_HASH "SHA256=b13c78b37495cafc6ef3f8a7b661b349c55a51abbd7f7f42f389408dcf86a463")
set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
diff --git a/cmake/kaldi-native-fbank.cmake b/cmake/kaldi-native-fbank.cmake
index 2d87b6a8b2..8f6803c888 100644
--- a/cmake/kaldi-native-fbank.cmake
+++ b/cmake/kaldi-native-fbank.cmake
@@ -2,7 +2,7 @@ function(download_kaldi_native_fbank)
include(FetchContent)
set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.20.0.tar.gz")
- set(kaldi_native_fbank_URL2 "https://hub.nuaa.cf/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.20.0.tar.gz")
+ set(kaldi_native_fbank_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.20.0.tar.gz")
set(kaldi_native_fbank_HASH "SHA256=c6195b3cf374eef824644061d3c04f6b2a9267ae554169cbaa9865c89c1fe4f9")
set(KALDI_NATIVE_FBANK_BUILD_TESTS OFF CACHE BOOL "" FORCE)
diff --git a/cmake/kaldifst.cmake b/cmake/kaldifst.cmake
index 765e2571a6..034d8c444a 100644
--- a/cmake/kaldifst.cmake
+++ b/cmake/kaldifst.cmake
@@ -2,7 +2,7 @@ function(download_kaldifst)
include(FetchContent)
set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.11.tar.gz")
- set(kaldifst_URL2 "https://hub.nuaa.cf/k2-fsa/kaldifst/archive/refs/tags/v1.7.11.tar.gz")
+ set(kaldifst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldifst-1.7.11.tar.gz")
set(kaldifst_HASH "SHA256=b43b3332faa2961edc730e47995a58cd4e22ead21905d55b0c4a41375b4a525f")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-aarch64-gpu.cmake b/cmake/onnxruntime-linux-aarch64-gpu.cmake
new file mode 100644
index 0000000000..5df32c996d
--- /dev/null
+++ b/cmake/onnxruntime-linux-aarch64-gpu.cmake
@@ -0,0 +1,119 @@
+# Copyright (c) 2022-2024 Xiaomi Corporation
+message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
+message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
+
+if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
+ message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
+endif()
+
+if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
+ message(FATAL_ERROR "This file is for aarch64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
+endif()
+
+if(NOT BUILD_SHARED_LIBS)
+ message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
+endif()
+
+if(NOT SHERPA_ONNX_ENABLE_GPU)
+ message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}")
+endif()
+
+message(WARNING "\
+SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION: ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION}
+If you use Jetson nano b01, then please pass
+ -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.11.0
+to cmake (You need to make sure CUDA 10.2 is available on your board).
+
+If you use Jetson Orin NX, then please pass
+ -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.16.0
+to cmake (You need to make sure CUDA 11.4 is available on your board).
+")
+
+set(v ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION})
+
+set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${v}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")
+
+if(v STREQUAL "1.11.0")
+ set(onnxruntime_HASH "SHA256=36eded935551e23aead09d4173bdf0bd1e7b01fdec15d77f97d6e34029aa60d7")
+else()
+ set(onnxruntime_HASH "SHA256=4c09d5acf2c2682b4eab1dc2f1ad98fc1fde5f5f1960063e337983ba59379a4b")
+endif()
+
+# If you don't have access to the Internet,
+# please download onnxruntime to one of the following locations.
+# You can add more if you want.
+set(possible_file_locations
+ $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+ ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+ ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+ /tmp/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+ /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+)
+
+foreach(f IN LISTS possible_file_locations)
+ if(EXISTS ${f})
+ set(onnxruntime_URL "${f}")
+ file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
+ message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
+ set(onnxruntime_URL2)
+ break()
+ endif()
+endforeach()
+
+FetchContent_Declare(onnxruntime
+ URL
+ ${onnxruntime_URL}
+ ${onnxruntime_URL2}
+ URL_HASH ${onnxruntime_HASH}
+)
+
+FetchContent_GetProperties(onnxruntime)
+if(NOT onnxruntime_POPULATED)
+ message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
+ FetchContent_Populate(onnxruntime)
+endif()
+message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
+
+find_library(location_onnxruntime onnxruntime
+ PATHS
+ "${onnxruntime_SOURCE_DIR}/lib"
+ NO_CMAKE_SYSTEM_PATH
+)
+
+message(STATUS "location_onnxruntime: ${location_onnxruntime}")
+
+add_library(onnxruntime SHARED IMPORTED)
+
+set_target_properties(onnxruntime PROPERTIES
+ IMPORTED_LOCATION ${location_onnxruntime}
+ INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
+)
+
+find_library(location_onnxruntime_cuda_lib onnxruntime_providers_cuda
+ PATHS
+ "${onnxruntime_SOURCE_DIR}/lib"
+ NO_CMAKE_SYSTEM_PATH
+)
+
+add_library(onnxruntime_providers_cuda SHARED IMPORTED)
+set_target_properties(onnxruntime_providers_cuda PROPERTIES
+ IMPORTED_LOCATION ${location_onnxruntime_cuda_lib}
+)
+message(STATUS "location_onnxruntime_cuda_lib: ${location_onnxruntime_cuda_lib}")
+
+# for libonnxruntime_providers_shared.so
+find_library(location_onnxruntime_providers_shared_lib onnxruntime_providers_shared
+ PATHS
+ "${onnxruntime_SOURCE_DIR}/lib"
+ NO_CMAKE_SYSTEM_PATH
+)
+add_library(onnxruntime_providers_shared SHARED IMPORTED)
+set_target_properties(onnxruntime_providers_shared PROPERTIES
+ IMPORTED_LOCATION ${location_onnxruntime_providers_shared_lib}
+)
+message(STATUS "location_onnxruntime_providers_shared_lib: ${location_onnxruntime_providers_shared_lib}")
+
+file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
+message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
+install(FILES ${onnxruntime_lib_files} DESTINATION lib)
diff --git a/cmake/onnxruntime-linux-aarch64-static.cmake b/cmake/onnxruntime-linux-aarch64-static.cmake
index 9606c79db7..4752e01020 100644
--- a/cmake/onnxruntime-linux-aarch64-static.cmake
+++ b/cmake/onnxruntime-linux-aarch64-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-static_lib-1.17.1.zip")
set(onnxruntime_HASH "SHA256=831b9a3869501040b4399de85f34c4f170e2bcbd41881edaeb553f8dc4080985")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-aarch64.cmake b/cmake/onnxruntime-linux-aarch64.cmake
index a6ef7ce122..a18f59e511 100644
--- a/cmake/onnxruntime-linux-aarch64.cmake
+++ b/cmake/onnxruntime-linux-aarch64.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1-patched.zip")
set(onnxruntime_HASH "SHA256=6e0e68985f8dd1f643e5a4dbe7cd54c9e176a0cc62249c6bee0699b87fc6d4fb")
# If you don't have access to the Internet,
@@ -53,11 +53,7 @@ if(NOT onnxruntime_POPULATED)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
-find_library(location_onnxruntime onnxruntime
- PATHS
- "${onnxruntime_SOURCE_DIR}/lib"
- NO_CMAKE_SYSTEM_PATH
-)
+set(location_onnxruntime "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so")
message(STATUS "location_onnxruntime: ${location_onnxruntime}")
diff --git a/cmake/onnxruntime-linux-arm-static.cmake b/cmake/onnxruntime-linux-arm-static.cmake
index cf2269afbe..fa9170e34b 100644
--- a/cmake/onnxruntime-linux-arm-static.cmake
+++ b/cmake/onnxruntime-linux-arm-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-arm-static_lib-1.17.1.zip")
set(onnxruntime_HASH "SHA256=3f2ba38156d2facfb732c0fe53bc1eaaf2791d9a91dd240380e3d53716798b09")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-arm.cmake b/cmake/onnxruntime-linux-arm.cmake
index a3adfaebd5..28bd426866 100644
--- a/cmake/onnxruntime-linux-arm.cmake
+++ b/cmake/onnxruntime-linux-arm.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-arm-1.17.1-patched.zip")
set(onnxruntime_HASH "SHA256=4ec00f7adc7341c068babea3d0f607349655e598222d4212115ae4f52619efdb")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-riscv64-static.cmake b/cmake/onnxruntime-linux-riscv64-static.cmake
index b400c4741a..dec7cf1bb3 100644
--- a/cmake/onnxruntime-linux-riscv64-static.cmake
+++ b/cmake/onnxruntime-linux-riscv64-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
set(onnxruntime_HASH "SHA256=77ecc51d8caf0953755db6edcdec2fc03bce3f6d379bedd635be50bb95f88da5")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-riscv64.cmake b/cmake/onnxruntime-linux-riscv64.cmake
index c773e5ecb7..121459326c 100644
--- a/cmake/onnxruntime-linux-riscv64.cmake
+++ b/cmake/onnxruntime-linux-riscv64.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.14.1/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.14.1/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
set(onnxruntime_HASH "SHA256=c2cbc5af081ff82f46640befd85433811486daaf28e702163c6e4e75020fde81")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-x86_64-gpu.cmake b/cmake/onnxruntime-linux-x86_64-gpu.cmake
index 5407a0b824..7aed2526f4 100644
--- a/cmake/onnxruntime-linux-x86_64-gpu.cmake
+++ b/cmake/onnxruntime-linux-x86_64-gpu.cmake
@@ -20,7 +20,7 @@ endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
set(onnxruntime_HASH "SHA256=1261de176e8d9d4d2019f8fa8c732c6d11494f3c6e73168ab6d2cc0903f22551")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-x86_64-static.cmake b/cmake/onnxruntime-linux-x86_64-static.cmake
index c6bb867b25..f72f9ad5ea 100644
--- a/cmake/onnxruntime-linux-x86_64-static.cmake
+++ b/cmake/onnxruntime-linux-x86_64-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-static_lib-1.17.1-glibc2_17.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-static_lib-1.17.1-glibc2_17.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-static_lib-1.17.1-glibc2_17.zip")
set(onnxruntime_HASH "SHA256=b646beeb983de843a267096d4457d832f93089f5e7264fd54b48cff207cb2068")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-x86_64.cmake b/cmake/onnxruntime-linux-x86_64.cmake
index eaa6f7608f..361f4d0d8f 100644
--- a/cmake/onnxruntime-linux-x86_64.cmake
+++ b/cmake/onnxruntime-linux-x86_64.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-glibc2_17-Release-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-glibc2_17-Release-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-glibc2_17-Release-1.17.1-patched.zip")
set(onnxruntime_HASH "SHA256=cb90c51a195bdd453aaf1582f3ef63b466dafbb15c4b8a552ca4dce3769e1d1e")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-arm64-static.cmake b/cmake/onnxruntime-osx-arm64-static.cmake
index 494e263ff1..7cd5a63ba7 100644
--- a/cmake/onnxruntime-osx-arm64-static.cmake
+++ b/cmake/onnxruntime-osx-arm64-static.cmake
@@ -13,7 +13,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-arm64-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-arm64-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-arm64-static_lib-1.17.1.zip")
set(onnxruntime_HASH "SHA256=b88a4017251c159fea005aefe836bd0cf4d0bc7454e2810784f84a42143f17eb")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-arm64.cmake b/cmake/onnxruntime-osx-arm64.cmake
index 3998cc8b4d..e3c986a44d 100644
--- a/cmake/onnxruntime-osx-arm64.cmake
+++ b/cmake/onnxruntime-osx-arm64.cmake
@@ -13,7 +13,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-arm64-1.17.1.tgz")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-arm64-1.17.1.tgz")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-arm64-1.17.1.tgz")
set(onnxruntime_HASH "SHA256=89566f424624a7ad9a7d9d5e413c44b9639a994d7171cf409901d125b16e2bb3")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-universal-static.cmake b/cmake/onnxruntime-osx-universal-static.cmake
index 2abcf46b4f..5bf635b8e9 100644
--- a/cmake/onnxruntime-osx-universal-static.cmake
+++ b/cmake/onnxruntime-osx-universal-static.cmake
@@ -14,7 +14,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-universal2-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-universal2-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-universal2-static_lib-1.17.1.zip")
set(onnxruntime_HASH "SHA256=45599dbd2fb9dd52d6505930c0e82ca165391e222a68f5606b9ea9d4f3922e15")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-universal.cmake b/cmake/onnxruntime-osx-universal.cmake
index 2b0fbb1108..fe5a53a634 100644
--- a/cmake/onnxruntime-osx-universal.cmake
+++ b/cmake/onnxruntime-osx-universal.cmake
@@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-universal2-1.17.1.tgz")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-universal2-1.17.1.tgz")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-universal2-1.17.1.tgz")
set(onnxruntime_HASH "SHA256=9fa57fa6f202a373599377ef75064ae568fda8da838632b26a86024c7378d306")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-x86_64-static.cmake b/cmake/onnxruntime-osx-x86_64-static.cmake
index 259ec4d014..a3c98e709b 100644
--- a/cmake/onnxruntime-osx-x86_64-static.cmake
+++ b/cmake/onnxruntime-osx-x86_64-static.cmake
@@ -13,7 +13,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-x86_64-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-x86_64-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-x86_64-static_lib-1.17.1.zip")
set(onnxruntime_HASH "SHA256=5ff8efb97e50e257943c6c588328d2c57b649278098d3b468036f02755b60903")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-x86_64.cmake b/cmake/onnxruntime-osx-x86_64.cmake
index 81b78991ac..4ca9674608 100644
--- a/cmake/onnxruntime-osx-x86_64.cmake
+++ b/cmake/onnxruntime-osx-x86_64.cmake
@@ -13,7 +13,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-x86_64-1.17.1.tgz")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-x86_64-1.17.1.tgz")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-x86_64-1.17.1.tgz")
set(onnxruntime_HASH "SHA256=86c6b6896434084ff5086eebc4e9ea90be1ed4d46743f92864f46ee43e7b5059")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-wasm-simd.cmake b/cmake/onnxruntime-wasm-simd.cmake
index dcc8fb5dd6..19ac0411c0 100644
--- a/cmake/onnxruntime-wasm-simd.cmake
+++ b/cmake/onnxruntime-wasm-simd.cmake
@@ -11,7 +11,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
set(onnxruntime_HASH "SHA256=8f07778e4233cf5a61a9d0795d90c5497177fbe8a46b701fda2d8d4e2b11cef8")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-arm64-static.cmake b/cmake/onnxruntime-win-arm64-static.cmake
new file mode 100644
index 0000000000..0ebbfc29a9
--- /dev/null
+++ b/cmake/onnxruntime-win-arm64-static.cmake
@@ -0,0 +1,72 @@
+# Copyright (c) 2022-2023 Xiaomi Corporation
+message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
+message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
+message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")
+
+if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
+ message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
+endif()
+
+if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL ARM64 OR CMAKE_VS_PLATFORM_NAME STREQUAL arm64))
+ message(FATAL_ERROR "This file is for Windows arm64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
+endif()
+
+if(BUILD_SHARED_LIBS)
+ message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
+endif()
+
+if(NOT CMAKE_BUILD_TYPE STREQUAL Release)
+ message(FATAL_ERROR "This file is for building a release version on Windows arm64")
+endif()
+
+set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_HASH "SHA256=534ab5bb8b5495ce45fed866cf3ec9034f89f2057a0152e49120b1088003a17e")
+
+# If you don't have access to the Internet,
+# please download onnxruntime to one of the following locations.
+# You can add more if you want.
+set(possible_file_locations
+ $ENV{HOME}/Downloads/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+ ${CMAKE_SOURCE_DIR}/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+ ${CMAKE_BINARY_DIR}/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+ /tmp/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+)
+
+foreach(f IN LISTS possible_file_locations)
+ if(EXISTS ${f})
+ set(onnxruntime_URL "${f}")
+ file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
+ message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
+ set(onnxruntime_URL2)
+ break()
+ endif()
+endforeach()
+
+FetchContent_Declare(onnxruntime
+ URL
+ ${onnxruntime_URL}
+ ${onnxruntime_URL2}
+ URL_HASH ${onnxruntime_HASH}
+)
+
+FetchContent_GetProperties(onnxruntime)
+if(NOT onnxruntime_POPULATED)
+ message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
+ FetchContent_Populate(onnxruntime)
+endif()
+message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
+
+# for static libraries, we use onnxruntime_lib_files directly below
+include_directories(${onnxruntime_SOURCE_DIR}/include)
+
+file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.lib")
+
+set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)
+
+message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
+if(SHERPA_ONNX_ENABLE_PYTHON)
+ install(FILES ${onnxruntime_lib_files} DESTINATION ..)
+else()
+ install(FILES ${onnxruntime_lib_files} DESTINATION lib)
+endif()
diff --git a/cmake/onnxruntime-win-arm64.cmake b/cmake/onnxruntime-win-arm64.cmake
index 0705b6451a..a4f247e34d 100644
--- a/cmake/onnxruntime-win-arm64.cmake
+++ b/cmake/onnxruntime-win-arm64.cmake
@@ -16,7 +16,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-arm64-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-arm64-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-arm64-1.17.1.zip")
set(onnxruntime_HASH "SHA256=47782cebcab0fd7a1f0a3f0676b088c1bc0f4fbf21666f6fe57570dc362fa5a8")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x64-directml.cmake b/cmake/onnxruntime-win-x64-directml.cmake
index 9648ffecce..a171a69a71 100644
--- a/cmake/onnxruntime-win-x64-directml.cmake
+++ b/cmake/onnxruntime-win-x64-directml.cmake
@@ -20,7 +20,7 @@ if(NOT SHERPA_ONNX_ENABLE_DIRECTML)
endif()
set(onnxruntime_URL "https://globalcdn.nuget.org/packages/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
-set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
set(onnxruntime_HASH "SHA256=c8ae7623385b19cd5de968d0df5383e13b97d1b3a6771c9177eac15b56013a5a")
# If you don't have access to the Internet,
@@ -158,4 +158,4 @@ file(GLOB directml_lib_files "${directml_SOURCE_DIR}/bin/x64-win/DirectML.*")
message(STATUS "DirectML lib files: ${directml_lib_files}")
install(FILES ${directml_lib_files} DESTINATION lib)
-install(FILES ${directml_lib_files} DESTINATION bin)
\ No newline at end of file
+install(FILES ${directml_lib_files} DESTINATION bin)
diff --git a/cmake/onnxruntime-win-x64-gpu.cmake b/cmake/onnxruntime-win-x64-gpu.cmake
index 18b64d01f7..5265653a57 100644
--- a/cmake/onnxruntime-win-x64-gpu.cmake
+++ b/cmake/onnxruntime-win-x64-gpu.cmake
@@ -20,7 +20,7 @@ if(NOT SHERPA_ONNX_ENABLE_GPU)
endif()
set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-gpu-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-gpu-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-gpu-1.17.1.zip")
set(onnxruntime_HASH "SHA256=b7a66f50ad146c2ccb43471d2d3b5ad78084c2d4ddbd3ea82d65f86c867408b2")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x64-static-debug.cmake b/cmake/onnxruntime-win-x64-static-debug.cmake
index 3281f4989a..211873cf31 100644
--- a/cmake/onnxruntime-win-x64-static-debug.cmake
+++ b/cmake/onnxruntime-win-x64-static-debug.cmake
@@ -16,7 +16,7 @@ if(BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
if(CMAKE_BUILD_TYPE STREQUAL Debug)
set(onnxruntime_HASH "SHA256=ecc68d914541c3b6ebc36148af63fe2a6af0f4f955b35199d612698d23169fa5")
elseif(CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo)
diff --git a/cmake/onnxruntime-win-x64-static.cmake b/cmake/onnxruntime-win-x64-static.cmake
index 009390872c..811d647536 100644
--- a/cmake/onnxruntime-win-x64-static.cmake
+++ b/cmake/onnxruntime-win-x64-static.cmake
@@ -20,7 +20,7 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL Release)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-1.17.1.tar.bz2")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static_lib-1.17.1.tar.bz2")
set(onnxruntime_HASH "SHA256=42a0c02fda945d1d72433b2a7cdb2187d51cb4d7f3af462c6ae07b25314d5fb3")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x64.cmake b/cmake/onnxruntime-win-x64.cmake
index 26f96fdb0c..4dbe0caa6f 100644
--- a/cmake/onnxruntime-win-x64.cmake
+++ b/cmake/onnxruntime-win-x64.cmake
@@ -16,7 +16,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-1.17.1.zip")
set(onnxruntime_HASH "SHA256=4802af9598db02153d7da39432a48823ff69b2fb4b59155461937f20782aa91c")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x86-static-debug.cmake b/cmake/onnxruntime-win-x86-static-debug.cmake
index a8d6858c69..8f00f2a506 100644
--- a/cmake/onnxruntime-win-x86-static-debug.cmake
+++ b/cmake/onnxruntime-win-x86-static-debug.cmake
@@ -17,7 +17,7 @@ endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
if(CMAKE_BUILD_TYPE STREQUAL Debug)
set(onnxruntime_HASH "SHA256=b08b223fe09a5640472eec487ff42e4df6bf726e8aba9de40f443a1fabea3334")
elseif(CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo)
diff --git a/cmake/onnxruntime-win-x86-static.cmake b/cmake/onnxruntime-win-x86-static.cmake
index 7e291a6164..ce424ee8cd 100644
--- a/cmake/onnxruntime-win-x86-static.cmake
+++ b/cmake/onnxruntime-win-x86-static.cmake
@@ -20,7 +20,7 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL Release)
endif()
set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-1.17.1.tar.bz2")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static_lib-1.17.1.tar.bz2")
set(onnxruntime_HASH "SHA256=52375d3fabc7b437c955a664bfeb9cb7a6391f5219c4b7d3b87ff690416d4b9e")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x86.cmake b/cmake/onnxruntime-win-x86.cmake
index 99ed71653a..cd8248300f 100644
--- a/cmake/onnxruntime-win-x86.cmake
+++ b/cmake/onnxruntime-win-x86.cmake
@@ -16,7 +16,7 @@ if(NOT BUILD_SHARED_LIBS)
endif()
set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x86-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x86-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-1.17.1.zip")
set(onnxruntime_HASH "SHA256=9404130825474bd36b2538ed925d6b5f2cf1fb6a443f3e125054ae3470019291")
# If you don't have access to the Internet,
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
index 6655b45cd3..6ed15c29cb 100644
--- a/cmake/onnxruntime.cmake
+++ b/cmake/onnxruntime.cmake
@@ -13,7 +13,9 @@ function(download_onnxruntime)
include(onnxruntime-linux-riscv64-static)
endif()
elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
- if(BUILD_SHARED_LIBS)
+ if(SHERPA_ONNX_ENABLE_GPU)
+ include(onnxruntime-linux-aarch64-gpu)
+ elseif(BUILD_SHARED_LIBS)
include(onnxruntime-linux-aarch64)
else()
include(onnxruntime-linux-aarch64-static)
@@ -89,10 +91,11 @@ function(download_onnxruntime)
endif()
elseif(CMAKE_VS_PLATFORM_NAME STREQUAL ARM64 OR CMAKE_VS_PLATFORM_NAME STREQUAL arm64)
# for 64-bit windows (arm64)
- if(NOT BUILD_SHARED_LIBS)
- message(FATAL_ERROR "Please pass -DBUILD_SHARED_LIBS=ON to cmake")
+ if(BUILD_SHARED_LIBS)
+ include(onnxruntime-win-arm64)
+ else()
+ include(onnxruntime-win-arm64-static)
endif()
- include(onnxruntime-win-arm64)
else()
# for 64-bit windows (x64)
if(SHERPA_ONNX_ENABLE_DIRECTML)
@@ -149,6 +152,8 @@ if(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE)
if(DEFINED ENV{SHERPA_ONNXRUNTIME_LIB_DIR})
if(APPLE)
set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.dylib)
+ elseif(WIN32)
+ set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/onnxruntime.lib)
else()
set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.so)
endif()
@@ -195,6 +200,7 @@ if(location_onnxruntime_header_dir AND location_onnxruntime_lib)
add_library(onnxruntime SHARED IMPORTED)
set_target_properties(onnxruntime PROPERTIES
IMPORTED_LOCATION ${location_onnxruntime_lib}
+ IMPORTED_IMPLIB ${location_onnxruntime_lib}
INTERFACE_INCLUDE_DIRECTORIES "${location_onnxruntime_header_dir}"
)
if(SHERPA_ONNX_ENABLE_GPU AND location_onnxruntime_cuda_lib)
diff --git a/cmake/openfst.cmake b/cmake/openfst.cmake
index 0f5863b7c9..2309c2fbe0 100644
--- a/cmake/openfst.cmake
+++ b/cmake/openfst.cmake
@@ -4,7 +4,7 @@ function(download_openfst)
include(FetchContent)
set(openfst_URL "https://github.com/csukuangfj/openfst/archive/refs/tags/sherpa-onnx-2024-06-19.tar.gz")
- set(openfst_URL2 "https://hub.nuaa.cf/csukuangfj/openfst/archive/refs/tags/sherpa-onnx-2024-06-19.tar.gz")
+ set(openfst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/openfst-sherpa-onnx-2024-06-19.tar.gz")
set(openfst_HASH "SHA256=5c98e82cc509c5618502dde4860b8ea04d843850ed57e6d6b590b644b268853d")
# If you don't have access to the Internet,
diff --git a/cmake/piper-phonemize.cmake b/cmake/piper-phonemize.cmake
index 7ecf1791b1..0e11fd1762 100644
--- a/cmake/piper-phonemize.cmake
+++ b/cmake/piper-phonemize.cmake
@@ -1,18 +1,18 @@
function(download_piper_phonemize)
include(FetchContent)
- set(piper_phonemize_URL "https://github.com/csukuangfj/piper-phonemize/archive/dc6b5f4441bffe521047086930b0fc12686acd56.zip")
- set(piper_phonemize_URL2 "https://hub.nuaa.cf/csukuangfj/piper-phonemize/archive/dc6b5f4441bffe521047086930b0fc12686acd56.zip")
- set(piper_phonemize_HASH "SHA256=b9faa04204b1756fa455a962abb1f037041c040133d55be58d11f11ab9b3ce14")
+ set(piper_phonemize_URL "https://github.com/csukuangfj/piper-phonemize/archive/78a788e0b719013401572d70fef372e77bff8e43.zip")
+ set(piper_phonemize_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip")
+ set(piper_phonemize_HASH "SHA256=89641a46489a4898754643ce57bda9c9b54b4ca46485fdc02bf0dc84b866645d")
# If you don't have access to the Internet,
# please pre-download kaldi-decoder
set(possible_file_locations
- $ENV{HOME}/Downloads/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
- ${CMAKE_SOURCE_DIR}/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
- ${CMAKE_BINARY_DIR}/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
- /tmp/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
- /star-fj/fangjun/download/github/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
+ $ENV{HOME}/Downloads/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+ ${CMAKE_SOURCE_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+ ${CMAKE_BINARY_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+ /tmp/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+ /star-fj/fangjun/download/github/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
)
foreach(f IN LISTS possible_file_locations)
diff --git a/cmake/pybind11.cmake b/cmake/pybind11.cmake
index 0d4894eff6..bc06a3d1c8 100644
--- a/cmake/pybind11.cmake
+++ b/cmake/pybind11.cmake
@@ -1,18 +1,18 @@
function(download_pybind11)
include(FetchContent)
- set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz")
- set(pybind11_URL2 "https://hub.nuaa.cf/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz")
- set(pybind11_HASH "SHA256=93bd1e625e43e03028a3ea7389bba5d3f9f2596abc074b068e70f4ef9b1314ae")
+ set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.tar.gz")
+ set(pybind11_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pybind11-2.12.0.tar.gz")
+ set(pybind11_HASH "SHA256=bf8f242abd1abcd375d516a7067490fb71abd79519a282d22b6e4d19282185a7")
# If you don't have access to the Internet,
# please pre-download pybind11
set(possible_file_locations
- $ENV{HOME}/Downloads/pybind11-2.10.2.tar.gz
- ${CMAKE_SOURCE_DIR}/pybind11-2.10.2.tar.gz
- ${CMAKE_BINARY_DIR}/pybind11-2.10.2.tar.gz
- /tmp/pybind11-2.10.2.tar.gz
- /star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz
+ $ENV{HOME}/Downloads/pybind11-2.12.0.tar.gz
+ ${CMAKE_SOURCE_DIR}/pybind11-2.12.0.tar.gz
+ ${CMAKE_BINARY_DIR}/pybind11-2.12.0.tar.gz
+ /tmp/pybind11-2.12.0.tar.gz
+ /star-fj/fangjun/download/github/pybind11-2.12.0.tar.gz
)
foreach(f IN LISTS possible_file_locations)
diff --git a/cmake/simple-sentencepiece.cmake b/cmake/simple-sentencepiece.cmake
index 09a640b11b..4b6750d0fc 100644
--- a/cmake/simple-sentencepiece.cmake
+++ b/cmake/simple-sentencepiece.cmake
@@ -2,7 +2,7 @@ function(download_simple_sentencepiece)
include(FetchContent)
set(simple-sentencepiece_URL "https://github.com/pkufool/simple-sentencepiece/archive/refs/tags/v0.7.tar.gz")
- set(simple-sentencepiece_URL2 "https://hub.nuaa.cf/pkufool/simple-sentencepiece/archive/refs/tags/v0.7.tar.gz")
+ set(simple-sentencepiece_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/simple-sentencepiece-0.7.tar.gz")
set(simple-sentencepiece_HASH "SHA256=1748a822060a35baa9f6609f84efc8eb54dc0e74b9ece3d82367b7119fdc75af")
# If you don't have access to the Internet,
diff --git a/cmake/websocketpp.cmake b/cmake/websocketpp.cmake
index 6ae9b89a3b..79b0585be4 100644
--- a/cmake/websocketpp.cmake
+++ b/cmake/websocketpp.cmake
@@ -3,7 +3,7 @@ function(download_websocketpp)
# The latest commit on the develop branch os as 2022-10-22
set(websocketpp_URL "https://github.com/zaphoyd/websocketpp/archive/b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
- set(websocketpp_URL2 "https://hub.nuaa.cf/zaphoyd/websocketpp/archive/b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
+ set(websocketpp_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
set(websocketpp_HASH "SHA256=1385135ede8191a7fbef9ec8099e3c5a673d48df0c143958216cd1690567f583")
# If you don't have access to the Internet,
diff --git a/cxx-api-examples/CMakeLists.txt b/cxx-api-examples/CMakeLists.txt
new file mode 100644
index 0000000000..fe21d580c8
--- /dev/null
+++ b/cxx-api-examples/CMakeLists.txt
@@ -0,0 +1,33 @@
+include_directories(${CMAKE_SOURCE_DIR})
+
+add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
+target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(kws-cxx-api ./kws-cxx-api.cc)
+target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc)
+target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(whisper-cxx-api ./whisper-cxx-api.cc)
+target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(moonshine-cxx-api ./moonshine-cxx-api.cc)
+target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
+target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
+
+if(SHERPA_ONNX_ENABLE_TTS)
+ add_executable(matcha-tts-zh-cxx-api ./matcha-tts-zh-cxx-api.cc)
+ target_link_libraries(matcha-tts-zh-cxx-api sherpa-onnx-cxx-api)
+
+ add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc)
+ target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api)
+
+ add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc)
+ target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api)
+
+ add_executable(kokoro-tts-zh-en-cxx-api ./kokoro-tts-zh-en-cxx-api.cc)
+ target_link_libraries(kokoro-tts-zh-en-cxx-api sherpa-onnx-cxx-api)
+endif()
diff --git a/cxx-api-examples/kokoro-tts-en-cxx-api.cc b/cxx-api-examples/kokoro-tts-en-cxx-api.cc
new file mode 100644
index 0000000000..66b28f03ef
--- /dev/null
+++ b/cxx-api-examples/kokoro-tts-en-cxx-api.cc
@@ -0,0 +1,73 @@
+// cxx-api-examples/kokoro-tts-en-cxx-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for English TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+./kokoro-tts-en-cxx-api
+
+ */
+// clang-format on
+
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress, void *arg) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OfflineTtsConfig config;
+
+ config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
+ config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
+ config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
+ config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
+
+ config.model.num_threads = 2;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ std::string filename = "./generated-kokoro-en-cxx.wav";
+ std::string text =
+ "Today as always, men fall into two groups: slaves and free men. Whoever "
+ "does not have two-thirds of his day for himself, is a slave, whatever "
+ "he may be: a statesman, a businessman, an official, or a scholar. "
+ "Friends fell out often because life was changing so fast. The easiest "
+ "thing in the world was to lose touch with someone.";
+
+ auto tts = OfflineTts::Create(config);
+ int32_t sid = 0;
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+ GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+ WriteWave(filename, {audio.samples, audio.sample_rate});
+
+ fprintf(stderr, "Input text is: %s\n", text.c_str());
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+ return 0;
+}
diff --git a/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc b/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
new file mode 100644
index 0000000000..c0228ad993
--- /dev/null
+++ b/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
@@ -0,0 +1,74 @@
+// cxx-api-examples/kokoro-tts-zh-en-cxx-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for Chinese + English TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+./kokoro-tts-zh-en-cxx-api
+
+ */
+// clang-format on
+
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress, void *arg) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OfflineTtsConfig config;
+
+ config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
+ config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
+ config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
+ config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
+ config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
+ config.model.kokoro.lexicon =
+ "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
+ "lexicon-zh.txt";
+
+ config.model.num_threads = 2;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ std::string filename = "./generated-kokoro-zh-en-cxx.wav";
+ std::string text =
+ "中英文语音合成测试。This is generated by next generation Kaldi using "
+ "Kokoro without Misaki. 你觉得中英文说的如何呢?";
+
+ auto tts = OfflineTts::Create(config);
+ int32_t sid = 50;
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+ GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+ WriteWave(filename, {audio.samples, audio.sample_rate});
+
+ fprintf(stderr, "Input text is: %s\n", text.c_str());
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+ return 0;
+}
diff --git a/cxx-api-examples/kws-cxx-api.cc b/cxx-api-examples/kws-cxx-api.cc
new file mode 100644
index 0000000000..12dc8d9cf0
--- /dev/null
+++ b/cxx-api-examples/kws-cxx-api.cc
@@ -0,0 +1,143 @@
+// cxx-api-examples/kws-cxx-api.cc
+//
+// Copyright (c) 2025 Xiaomi Corporation
+//
+// This file demonstrates how to use keywords spotter with sherpa-onnx's C
+// clang-format off
+//
+// Usage
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+//
+// ./kws-cxx-api
+//
+// clang-format on
+#include
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+ using namespace sherpa_onnx::cxx; // NOLINT
+
+ KeywordSpotterConfig config;
+ config.model_config.transducer.encoder =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+ config.model_config.transducer.decoder =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
+
+ config.model_config.transducer.joiner =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+ config.model_config.tokens =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "tokens.txt";
+
+ config.model_config.provider = "cpu";
+ config.model_config.num_threads = 1;
+ config.model_config.debug = 1;
+
+ config.keywords_file =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "test_wavs/test_keywords.txt";
+
+ KeywordSpotter kws = KeywordSpotter::Create(config);
+ if (!kws.Get()) {
+ std::cerr << "Please check your config\n";
+ return -1;
+ }
+
+ std::cout
+ << "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n";
+
+ std::string wave_filename =
+ "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+ "test_wavs/3.wav";
+
+ std::array tail_paddings = {0}; // 0.5 seconds
+
+ Wave wave = ReadWave(wave_filename);
+ if (wave.samples.empty()) {
+ std::cerr << "Failed to read: '" << wave_filename << "'\n";
+ return -1;
+ }
+
+ OnlineStream stream = kws.CreateStream();
+ if (!stream.Get()) {
+ std::cerr << "Failed to create stream\n";
+ return -1;
+ }
+
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+
+ stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
+ tail_paddings.size());
+ stream.InputFinished();
+
+ while (kws.IsReady(&stream)) {
+ kws.Decode(&stream);
+ auto r = kws.GetResult(&stream);
+ if (!r.keyword.empty()) {
+ std::cout << "Detected keyword: " << r.json << "\n";
+
+ // Remember to reset the keyword stream right after a keyword is detected
+ kws.Reset(&stream);
+ }
+ }
+
+ // --------------------------------------------------------------------------
+
+ std::cout << "--Use pre-defined keywords + add a new keyword--\n";
+
+ stream = kws.CreateStream("y ǎn y uán @演员");
+
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+
+ stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
+ tail_paddings.size());
+ stream.InputFinished();
+
+ while (kws.IsReady(&stream)) {
+ kws.Decode(&stream);
+ auto r = kws.GetResult(&stream);
+ if (!r.keyword.empty()) {
+ std::cout << "Detected keyword: " << r.json << "\n";
+
+ // Remember to reset the keyword stream right after a keyword is detected
+ kws.Reset(&stream);
+ }
+ }
+
+ // --------------------------------------------------------------------------
+
+ std::cout << "--Use pre-defined keywords + add two new keywords--\n";
+
+ stream = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名");
+
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+
+ stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
+ tail_paddings.size());
+ stream.InputFinished();
+
+ while (kws.IsReady(&stream)) {
+ kws.Decode(&stream);
+ auto r = kws.GetResult(&stream);
+ if (!r.keyword.empty()) {
+ std::cout << "Detected keyword: " << r.json << "\n";
+
+ // Remember to reset the keyword stream right after a keyword is detected
+ kws.Reset(&stream);
+ }
+ }
+ return 0;
+}
diff --git a/cxx-api-examples/matcha-tts-en-cxx-api.cc b/cxx-api-examples/matcha-tts-en-cxx-api.cc
new file mode 100644
index 0000000000..ef4187d060
--- /dev/null
+++ b/cxx-api-examples/matcha-tts-en-cxx-api.cc
@@ -0,0 +1,80 @@
+// cxx-api-examples/matcha-tts-en-cxx-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for Chinese TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-en-cxx-api
+
+ */
+// clang-format on
+
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress, void *arg) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OfflineTtsConfig config;
+
+ config.model.matcha.acoustic_model =
+ "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
+
+ config.model.matcha.vocoder = "./hifigan_v2.onnx";
+
+ config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
+
+ config.model.matcha.data_dir =
+ "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
+
+ config.model.num_threads = 1;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ std::string filename = "./generated-matcha-en-cxx.wav";
+ std::string text =
+ "Today as always, men fall into two groups: slaves and free men. Whoever "
+ "does not have two-thirds of his day for himself, is a slave, whatever "
+ "he may be: a statesman, a businessman, an official, or a scholar. "
+ "Friends fell out often because life was changing so fast. The easiest "
+ "thing in the world was to lose touch with someone.";
+
+ auto tts = OfflineTts::Create(config);
+ int32_t sid = 0;
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+ GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+ WriteWave(filename, {audio.samples, audio.sample_rate});
+
+ fprintf(stderr, "Input text is: %s\n", text.c_str());
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+ return 0;
+}
diff --git a/cxx-api-examples/matcha-tts-zh-cxx-api.cc b/cxx-api-examples/matcha-tts-zh-cxx-api.cc
new file mode 100644
index 0000000000..f63065994f
--- /dev/null
+++ b/cxx-api-examples/matcha-tts-zh-cxx-api.cc
@@ -0,0 +1,79 @@
+// cxx-api-examples/matcha-tts-zh-cxx-api.c
+//
+// Copyright (c) 2025 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for Chinese TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-zh-cxx-api
+
+ */
+// clang-format on
+
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+ float progress, void *arg) {
+ fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+ // return 1 to continue generating
+ // return 0 to stop generating
+ return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OfflineTtsConfig config;
+ config.model.matcha.acoustic_model =
+ "./matcha-icefall-zh-baker/model-steps-3.onnx";
+ config.model.matcha.vocoder = "./hifigan_v2.onnx";
+ config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
+ config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
+ config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
+ config.model.num_threads = 1;
+
+ // If you don't want to see debug messages, please set it to 0
+ config.model.debug = 1;
+
+ // clang-format off
+ config.rule_fsts = "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"; // NOLINT
+ // clang-format on
+
+ std::string filename = "./generated-matcha-zh-cxx.wav";
+ std::string text =
+ "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如"
+ "涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感"
+ "受着生命的奇迹与温柔."
+ "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; "
+ "经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。";
+
+ auto tts = OfflineTts::Create(config);
+ int32_t sid = 0;
+ float speed = 1.0; // larger -> faster in speech speed
+
+#if 0
+ // If you don't want to use a callback, then please enable this branch
+ GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+ GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+ WriteWave(filename, {audio.samples, audio.sample_rate});
+
+ fprintf(stderr, "Input text is: %s\n", text.c_str());
+ fprintf(stderr, "Speaker ID is is: %d\n", sid);
+ fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+ return 0;
+}
diff --git a/cxx-api-examples/moonshine-cxx-api.cc b/cxx-api-examples/moonshine-cxx-api.cc
new file mode 100644
index 0000000000..c2ce565c3b
--- /dev/null
+++ b/cxx-api-examples/moonshine-cxx-api.cc
@@ -0,0 +1,81 @@
+// cxx-api-examples/moonshine-cxx-api.cc
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use Moonshine with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+//
+// clang-format on
+
+#include // NOLINT
+#include
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OfflineRecognizerConfig config;
+
+ config.model_config.moonshine.preprocessor =
+ "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+ config.model_config.moonshine.encoder =
+ "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+ config.model_config.moonshine.uncached_decoder =
+ "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+ config.model_config.moonshine.cached_decoder =
+ "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+ config.model_config.tokens =
+ "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+ config.model_config.num_threads = 1;
+
+ std::cout << "Loading model\n";
+ OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
+ if (!recongizer.Get()) {
+ std::cerr << "Please check your config\n";
+ return -1;
+ }
+ std::cout << "Loading model done\n";
+
+ std::string wave_filename =
+ "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
+ Wave wave = ReadWave(wave_filename);
+ if (wave.samples.empty()) {
+ std::cerr << "Failed to read: '" << wave_filename << "'\n";
+ return -1;
+ }
+
+ std::cout << "Start recognition\n";
+ const auto begin = std::chrono::steady_clock::now();
+
+ OfflineStream stream = recongizer.CreateStream();
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+
+ recongizer.Decode(&stream);
+
+ OfflineRecognizerResult result = recongizer.GetResult(&stream);
+
+ const auto end = std::chrono::steady_clock::now();
+ const float elapsed_seconds =
+ std::chrono::duration_cast(end - begin)
+ .count() /
+ 1000.;
+ float duration = wave.samples.size() / static_cast(wave.sample_rate);
+ float rtf = elapsed_seconds / duration;
+
+ std::cout << "text: " << result.text << "\n";
+ printf("Number of threads: %d\n", config.model_config.num_threads);
+ printf("Duration: %.3fs\n", duration);
+ printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+ printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+ duration, rtf);
+
+ return 0;
+}
diff --git a/cxx-api-examples/sense-voice-cxx-api.cc b/cxx-api-examples/sense-voice-cxx-api.cc
new file mode 100644
index 0000000000..ea642b9802
--- /dev/null
+++ b/cxx-api-examples/sense-voice-cxx-api.cc
@@ -0,0 +1,78 @@
+// cxx-api-examples/sense-voice-cxx-api.cc
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use sense voice with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+//
+// clang-format on
+
+#include // NOLINT
+#include
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OfflineRecognizerConfig config;
+
+ config.model_config.sense_voice.model =
+ "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
+ config.model_config.sense_voice.use_itn = true;
+ config.model_config.sense_voice.language = "auto";
+ config.model_config.tokens =
+ "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
+
+ config.model_config.num_threads = 1;
+
+ std::cout << "Loading model\n";
+ OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
+ if (!recongizer.Get()) {
+ std::cerr << "Please check your config\n";
+ return -1;
+ }
+ std::cout << "Loading model done\n";
+
+ std::string wave_filename =
+ "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav";
+
+ Wave wave = ReadWave(wave_filename);
+ if (wave.samples.empty()) {
+ std::cerr << "Failed to read: '" << wave_filename << "'\n";
+ return -1;
+ }
+
+ std::cout << "Start recognition\n";
+ const auto begin = std::chrono::steady_clock::now();
+
+ OfflineStream stream = recongizer.CreateStream();
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+
+ recongizer.Decode(&stream);
+
+ OfflineRecognizerResult result = recongizer.GetResult(&stream);
+
+ const auto end = std::chrono::steady_clock::now();
+ const float elapsed_seconds =
+ std::chrono::duration_cast(end - begin)
+ .count() /
+ 1000.;
+ float duration = wave.samples.size() / static_cast(wave.sample_rate);
+ float rtf = elapsed_seconds / duration;
+
+ std::cout << "text: " << result.text << "\n";
+ printf("Number of threads: %d\n", config.model_config.num_threads);
+ printf("Duration: %.3fs\n", duration);
+ printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+ printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+ duration, rtf);
+
+ return 0;
+}
diff --git a/cxx-api-examples/streaming-zipformer-cxx-api.cc b/cxx-api-examples/streaming-zipformer-cxx-api.cc
new file mode 100644
index 0000000000..ac4abc4796
--- /dev/null
+++ b/cxx-api-examples/streaming-zipformer-cxx-api.cc
@@ -0,0 +1,93 @@
+// cxx-api-examples/streaming-zipformer-cxx-api.cc
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use streaming Zipformer
+// with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+//
+// clang-format on
+
+#include // NOLINT
+#include
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OnlineRecognizerConfig config;
+
+ // please see
+ // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
+ config.model_config.transducer.encoder =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+ "encoder-epoch-99-avg-1.int8.onnx";
+
+ // Note: We recommend not using int8.onnx for the decoder.
+ config.model_config.transducer.decoder =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+ "decoder-epoch-99-avg-1.onnx";
+
+ config.model_config.transducer.joiner =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+ "joiner-epoch-99-avg-1.int8.onnx";
+
+ config.model_config.tokens =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
+
+ config.model_config.num_threads = 1;
+
+ std::cout << "Loading model\n";
+ OnlineRecognizer recongizer = OnlineRecognizer::Create(config);
+ if (!recongizer.Get()) {
+ std::cerr << "Please check your config\n";
+ return -1;
+ }
+ std::cout << "Loading model done\n";
+
+ std::string wave_filename =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/"
+ "0.wav";
+ Wave wave = ReadWave(wave_filename);
+ if (wave.samples.empty()) {
+ std::cerr << "Failed to read: '" << wave_filename << "'\n";
+ return -1;
+ }
+
+ std::cout << "Start recognition\n";
+ const auto begin = std::chrono::steady_clock::now();
+
+ OnlineStream stream = recongizer.CreateStream();
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+ stream.InputFinished();
+
+ while (recongizer.IsReady(&stream)) {
+ recongizer.Decode(&stream);
+ }
+
+ OnlineRecognizerResult result = recongizer.GetResult(&stream);
+
+ const auto end = std::chrono::steady_clock::now();
+ const float elapsed_seconds =
+ std::chrono::duration_cast(end - begin)
+ .count() /
+ 1000.;
+ float duration = wave.samples.size() / static_cast(wave.sample_rate);
+ float rtf = elapsed_seconds / duration;
+
+ std::cout << "text: " << result.text << "\n";
+ printf("Number of threads: %d\n", config.model_config.num_threads);
+ printf("Duration: %.3fs\n", duration);
+ printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+ printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+ duration, rtf);
+
+ return 0;
+}
diff --git a/cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc b/cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
new file mode 100644
index 0000000000..2e74d30bec
--- /dev/null
+++ b/cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
@@ -0,0 +1,132 @@
+// cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use streaming Zipformer
+// with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// cd /path/sherpa-onnx/
+// mkdir build
+// cd build
+// cmake ..
+// make
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+//
+// # 1. Test on CPU, run once
+//
+// ./bin/streaming-zipformer-rtf-cxx-api
+//
+// # 2. Test on CPU, run 10 times
+//
+// ./bin/streaming-zipformer-rtf-cxx-api 10
+//
+// # 3. Test on GPU, run 10 times
+//
+// ./bin/streaming-zipformer-rtf-cxx-api 10 cuda
+//
+// clang-format on
+
+#include // NOLINT
+#include
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main(int argc, char *argv[]) {
+ int32_t num_runs = 1;
+ if (argc >= 2) {
+ num_runs = atoi(argv[1]);
+ if (num_runs < 0) {
+ num_runs = 1;
+ }
+ }
+
+ bool use_gpu = (argc == 3);
+
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OnlineRecognizerConfig config;
+
+ // please see
+ // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
+ config.model_config.transducer.encoder =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+ "encoder-epoch-99-avg-1.int8.onnx";
+
+ // Note: We recommend not using int8.onnx for the decoder.
+ config.model_config.transducer.decoder =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+ "decoder-epoch-99-avg-1.onnx";
+
+ config.model_config.transducer.joiner =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+ "joiner-epoch-99-avg-1.int8.onnx";
+
+ config.model_config.tokens =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
+
+ config.model_config.num_threads = 1;
+ config.model_config.provider = use_gpu ? "cuda" : "cpu";
+
+ std::cout << "Loading model\n";
+ OnlineRecognizer recongizer = OnlineRecognizer::Create(config);
+ if (!recongizer.Get()) {
+ std::cerr << "Please check your config\n";
+ return -1;
+ }
+ std::cout << "Loading model done\n";
+
+ std::string wave_filename =
+ "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/"
+ "0.wav";
+ Wave wave = ReadWave(wave_filename);
+ if (wave.samples.empty()) {
+ std::cerr << "Failed to read: '" << wave_filename << "'\n";
+ return -1;
+ }
+
+ std::cout << "Start recognition\n";
+ float total_elapsed_seconds = 0;
+ OnlineRecognizerResult result;
+ for (int32_t i = 0; i < num_runs; ++i) {
+ const auto begin = std::chrono::steady_clock::now();
+
+ OnlineStream stream = recongizer.CreateStream();
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+ stream.InputFinished();
+
+ while (recongizer.IsReady(&stream)) {
+ recongizer.Decode(&stream);
+ }
+
+ result = recongizer.GetResult(&stream);
+
+ auto end = std::chrono::steady_clock::now();
+ float elapsed_seconds =
+ std::chrono::duration_cast(end - begin)
+ .count() /
+ 1000.;
+ printf("Run %d/%d, elapsed seconds: %.3f\n", i, num_runs, elapsed_seconds);
+ total_elapsed_seconds += elapsed_seconds;
+ }
+ float average_elapsed_secodns = total_elapsed_seconds / num_runs;
+ float duration = wave.samples.size() / static_cast(wave.sample_rate);
+ float rtf = total_elapsed_seconds / num_runs / duration;
+
+ std::cout << "text: " << result.text << "\n";
+ printf("Number of threads: %d\n", config.model_config.num_threads);
+ printf("Duration: %.3fs\n", duration);
+ printf("Total Elapsed seconds: %.3fs\n", total_elapsed_seconds);
+ printf("Num runs: %d\n", num_runs);
+ printf("Elapsed seconds per run: %.3f/%d=%.3f\n", total_elapsed_seconds,
+ num_runs, average_elapsed_secodns);
+ printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n",
+ average_elapsed_secodns, duration, rtf);
+
+ return 0;
+}
diff --git a/cxx-api-examples/whisper-cxx-api.cc b/cxx-api-examples/whisper-cxx-api.cc
new file mode 100644
index 0000000000..348d115bd3
--- /dev/null
+++ b/cxx-api-examples/whisper-cxx-api.cc
@@ -0,0 +1,76 @@
+// cxx-api-examples/whisper-cxx-api.cc
+// Copyright (c) 2024 Xiaomi Corporation
+
+//
+// This file demonstrates how to use whisper with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+// rm sherpa-onnx-whisper-tiny.en.tar.bz2
+//
+// clang-format on
+
+#include // NOLINT
+#include
+#include
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+ using namespace sherpa_onnx::cxx; // NOLINT
+ OfflineRecognizerConfig config;
+
+ config.model_config.whisper.encoder =
+ "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
+ config.model_config.whisper.decoder =
+ "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
+ config.model_config.tokens =
+ "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";
+
+ config.model_config.num_threads = 1;
+
+ std::cout << "Loading model\n";
+ OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
+ if (!recongizer.Get()) {
+ std::cerr << "Please check your config\n";
+ return -1;
+ }
+ std::cout << "Loading model done\n";
+
+ std::string wave_filename = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav";
+ Wave wave = ReadWave(wave_filename);
+ if (wave.samples.empty()) {
+ std::cerr << "Failed to read: '" << wave_filename << "'\n";
+ return -1;
+ }
+
+ std::cout << "Start recognition\n";
+ const auto begin = std::chrono::steady_clock::now();
+
+ OfflineStream stream = recongizer.CreateStream();
+ stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+ wave.samples.size());
+
+ recongizer.Decode(&stream);
+
+ OfflineRecognizerResult result = recongizer.GetResult(&stream);
+
+ const auto end = std::chrono::steady_clock::now();
+ const float elapsed_seconds =
+ std::chrono::duration_cast(end - begin)
+ .count() /
+ 1000.;
+ float duration = wave.samples.size() / static_cast(wave.sample_rate);
+ float rtf = elapsed_seconds / duration;
+
+ std::cout << "text: " << result.text << "\n";
+ printf("Number of threads: %d\n", config.model_config.num_threads);
+ printf("Duration: %.3fs\n", duration);
+ printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+ printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+ duration, rtf);
+
+ return 0;
+}
diff --git a/dart-api-examples/README.md b/dart-api-examples/README.md
index 9370372e79..3d66cb04ea 100644
--- a/dart-api-examples/README.md
+++ b/dart-api-examples/README.md
@@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx
| Directory | Description |
|-----------|-------------|
+| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.|
| [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
| [./audio-tagging](./audio-tagging)| Example for audio tagging.|
| [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
diff --git a/dart-api-examples/add-punctuations/pubspec.yaml b/dart-api-examples/add-punctuations/pubspec.yaml
index 9c03139c3b..801744d95d 100644
--- a/dart-api-examples/add-punctuations/pubspec.yaml
+++ b/dart-api-examples/add-punctuations/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dart-api-examples/audio-tagging/pubspec.yaml b/dart-api-examples/audio-tagging/pubspec.yaml
index bae6aa72cb..fb470975a1 100644
--- a/dart-api-examples/audio-tagging/pubspec.yaml
+++ b/dart-api-examples/audio-tagging/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart b/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart
index ebef1fd7c5..47d587989f 100644
--- a/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart
+++ b/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart
@@ -73,6 +73,8 @@ void main(List arguments) async {
spotter.decode(stream);
final result = spotter.getResult(stream);
if (result.keyword != '') {
+ // Remember to reset the stream right after detecting a keyword
+ spotter.reset(stream);
print('Detected: ${result.keyword}');
}
}
diff --git a/dart-api-examples/keyword-spotter/pubspec.yaml b/dart-api-examples/keyword-spotter/pubspec.yaml
index eeae130c88..cd86f374d6 100644
--- a/dart-api-examples/keyword-spotter/pubspec.yaml
+++ b/dart-api-examples/keyword-spotter/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0
diff --git a/dart-api-examples/non-streaming-asr/bin/moonshine.dart b/dart-api-examples/non-streaming-asr/bin/moonshine.dart
new file mode 100644
index 0000000000..68b653648e
--- /dev/null
+++ b/dart-api-examples/non-streaming-asr/bin/moonshine.dart
@@ -0,0 +1,69 @@
+// Copyright (c) 2024 Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List arguments) async {
+ await initSherpaOnnx();
+
+ final parser = ArgParser()
+ ..addOption('preprocessor',
+ help: 'Path to the moonshine preprocessor model')
+ ..addOption('encoder', help: 'Path to the moonshine encoder model')
+ ..addOption('uncached-decoder',
+ help: 'Path to moonshine uncached decoder model')
+ ..addOption('cached-decoder',
+ help: 'Path to moonshine cached decoder model')
+ ..addOption('tokens', help: 'Path to tokens.txt')
+ ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+
+ final res = parser.parse(arguments);
+ if (res['preprocessor'] == null ||
+ res['encoder'] == null ||
+ res['uncached-decoder'] == null ||
+ res['cached-decoder'] == null ||
+ res['tokens'] == null ||
+ res['input-wav'] == null) {
+ print(parser.usage);
+ exit(1);
+ }
+
+ final preprocessor = res['preprocessor'] as String;
+ final encoder = res['encoder'] as String;
+ final uncachedDecoder = res['uncached-decoder'] as String;
+ final cachedDecoder = res['cached-decoder'] as String;
+ final tokens = res['tokens'] as String;
+ final inputWav = res['input-wav'] as String;
+
+ final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
+ preprocessor: preprocessor,
+ encoder: encoder,
+ uncachedDecoder: uncachedDecoder,
+ cachedDecoder: cachedDecoder,
+ );
+
+ final modelConfig = sherpa_onnx.OfflineModelConfig(
+ moonshine: moonshine,
+ tokens: tokens,
+ debug: false,
+ numThreads: 1,
+ );
+ final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+ final recognizer = sherpa_onnx.OfflineRecognizer(config);
+
+ final waveData = sherpa_onnx.readWave(inputWav);
+ final stream = recognizer.createStream();
+
+ stream.acceptWaveform(
+ samples: waveData.samples, sampleRate: waveData.sampleRate);
+ recognizer.decode(stream);
+
+ final result = recognizer.getResult(stream);
+ print(result.text);
+
+ stream.free();
+ recognizer.free();
+}
diff --git a/dart-api-examples/non-streaming-asr/pubspec.yaml b/dart-api-examples/non-streaming-asr/pubspec.yaml
index d348b84515..fd93095daf 100644
--- a/dart-api-examples/non-streaming-asr/pubspec.yaml
+++ b/dart-api-examples/non-streaming-asr/pubspec.yaml
@@ -10,7 +10,7 @@ environment:
# Add regular dependencies here.
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dart-api-examples/non-streaming-asr/run-moonshine.sh b/dart-api-examples/non-streaming-asr/run-moonshine.sh
new file mode 100755
index 0000000000..213a230d0e
--- /dev/null
+++ b/dart-api-examples/non-streaming-asr/run-moonshine.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+dart run \
+ ./bin/moonshine.dart \
+ --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+ --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+ --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+ --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+ --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+ --input-wav ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
diff --git a/dart-api-examples/speaker-diarization/.gitignore b/dart-api-examples/speaker-diarization/.gitignore
new file mode 100644
index 0000000000..3a85790408
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/.gitignore
@@ -0,0 +1,3 @@
+# https://dart.dev/guides/libraries/private-files
+# Created by `dart pub`
+.dart_tool/
diff --git a/dart-api-examples/speaker-diarization/CHANGELOG.md b/dart-api-examples/speaker-diarization/CHANGELOG.md
new file mode 100644
index 0000000000..effe43c82c
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/CHANGELOG.md
@@ -0,0 +1,3 @@
+## 1.0.0
+
+- Initial version.
diff --git a/dart-api-examples/speaker-diarization/README.md b/dart-api-examples/speaker-diarization/README.md
new file mode 100644
index 0000000000..d4d8c4fd27
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/README.md
@@ -0,0 +1,7 @@
+# Introduction
+
+This example shows how to use the Dart API from sherpa-onnx for speaker diarization.
+
+# Usage
+
+Please see [./run.sh](./run.sh)
diff --git a/dart-api-examples/speaker-diarization/analysis_options.yaml b/dart-api-examples/speaker-diarization/analysis_options.yaml
new file mode 100644
index 0000000000..dee8927aaf
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/analysis_options.yaml
@@ -0,0 +1,30 @@
+# This file configures the static analysis results for your project (errors,
+# warnings, and lints).
+#
+# This enables the 'recommended' set of lints from `package:lints`.
+# This set helps identify many issues that may lead to problems when running
+# or consuming Dart code, and enforces writing Dart using a single, idiomatic
+# style and format.
+#
+# If you want a smaller set of lints you can change this to specify
+# 'package:lints/core.yaml'. These are just the most critical lints
+# (the recommended set includes the core lints).
+# The core lints are also what is used by pub.dev for scoring packages.
+
+include: package:lints/recommended.yaml
+
+# Uncomment the following section to specify additional rules.
+
+# linter:
+# rules:
+# - camel_case_types
+
+# analyzer:
+# exclude:
+# - path/to/excluded/files/**
+
+# For more information about the core and recommended set of lints, see
+# https://dart.dev/go/core-lints
+
+# For additional information about configuring this file, see
+# https://dart.dev/guides/language/analysis-options
diff --git a/dart-api-examples/speaker-diarization/bin/init.dart b/dart-api-examples/speaker-diarization/bin/init.dart
new file mode 120000
index 0000000000..48508cfd39
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/bin/init.dart
@@ -0,0 +1 @@
+../../vad/bin/init.dart
\ No newline at end of file
diff --git a/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart b/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
new file mode 100644
index 0000000000..760adc8680
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
@@ -0,0 +1,100 @@
+// Copyright (c) 2024 Xiaomi Corporation
+import 'dart:io';
+import 'dart:typed_data';
+import 'dart:ffi';
+
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+import './init.dart';
+
+void main(List arguments) async {
+ await initSherpaOnnx();
+
+ /* Please use the following commands to download files used in this file
+ Step 1: Download a speaker segmentation model
+
+ Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+ for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+ Step 2: Download a speaker embedding extractor model
+
+ Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+ for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+ Step 3. Download test wave files
+
+ Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+ for a list of available test wave files. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+ Step 4. Run it
+ */
+
+ final segmentationModel =
+ "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+
+ final embeddingModel =
+ "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+
+ final waveFilename = "./0-four-speakers-zh.wav";
+
+ final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
+ pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
+ model: segmentationModel),
+ );
+
+ final embeddingConfig =
+ sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel);
+
+ // since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
+ // numClusters to 4. If you don't know the exact number, please set it to -1.
+ // in that case, you have to set threshold. A larger threshold leads to
+ // fewer clusters, i.e., fewer speakers.
+ final clusteringConfig =
+ sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5);
+
+ var config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
+ segmentation: segmentationConfig,
+ embedding: embeddingConfig,
+ clustering: clusteringConfig,
+ minDurationOn: 0.2,
+ minDurationOff: 0.5);
+
+ final sd = sherpa_onnx.OfflineSpeakerDiarization(config);
+ if (sd.ptr == nullptr) {
+ return;
+ }
+
+ final waveData = sherpa_onnx.readWave(waveFilename);
+ if (sd.sampleRate != waveData.sampleRate) {
+ print(
+ 'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}');
+ return;
+ }
+
+ print('started');
+
+ // Use the following statement if you don't want to use a callback
+ // final segments = sd.process(samples: waveData.samples);
+
+ final segments = sd.processWithCallback(
+ samples: waveData.samples,
+ callback: (int numProcessedChunk, int numTotalChunks) {
+ final progress = 100.0 * numProcessedChunk / numTotalChunks;
+
+ print('Progress ${progress.toStringAsFixed(2)}%');
+
+ return 0;
+ });
+
+ for (int i = 0; i < segments.length; ++i) {
+ print(
+ '${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)} speaker_${segments[i].speaker}');
+ }
+}
diff --git a/dart-api-examples/speaker-diarization/pubspec.yaml b/dart-api-examples/speaker-diarization/pubspec.yaml
new file mode 100644
index 0000000000..7f18b469cb
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/pubspec.yaml
@@ -0,0 +1,17 @@
+name: speaker_diarization
+description: >
+ This example demonstrates how to use the Dart API for speaker diarization.
+
+version: 1.0.0
+
+environment:
+ sdk: ">=3.0.0 <4.0.0"
+
+dependencies:
+ sherpa_onnx: ^1.10.42
+ # sherpa_onnx:
+ # path: ../../flutter/sherpa_onnx
+ path: ^1.9.0
+
+dev_dependencies:
+ lints: ^3.0.0
diff --git a/dart-api-examples/speaker-diarization/run.sh b/dart-api-examples/speaker-diarization/run.sh
new file mode 100755
index 0000000000..7717870dce
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/run.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+dart run ./bin/speaker-diarization.dart
diff --git a/dart-api-examples/speaker-identification/pubspec.yaml b/dart-api-examples/speaker-identification/pubspec.yaml
index fa31c272e2..6608cf8215 100644
--- a/dart-api-examples/speaker-identification/pubspec.yaml
+++ b/dart-api-examples/speaker-identification/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dart-api-examples/streaming-asr/pubspec.yaml b/dart-api-examples/streaming-asr/pubspec.yaml
index 24bd1decca..6289e1acca 100644
--- a/dart-api-examples/streaming-asr/pubspec.yaml
+++ b/dart-api-examples/streaming-asr/pubspec.yaml
@@ -11,7 +11,7 @@ environment:
# Add regular dependencies here.
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dart-api-examples/tts/bin/kokoro-en.dart b/dart-api-examples/tts/bin/kokoro-en.dart
new file mode 100644
index 0000000000..b92d92883f
--- /dev/null
+++ b/dart-api-examples/tts/bin/kokoro-en.dart
@@ -0,0 +1,86 @@
+// Copyright (c) 2025 Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List arguments) async {
+ await initSherpaOnnx();
+
+ final parser = ArgParser()
+ ..addOption('model', help: 'Path to the onnx model')
+ ..addOption('voices', help: 'Path to the voices.bin')
+ ..addOption('tokens', help: 'Path to tokens.txt')
+ ..addOption(
+ 'data-dir',
+ help: 'Path to espeak-ng-data directory',
+ defaultsTo: '',
+ )
+ ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+ ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+ ..addOption('text', help: 'Text to generate TTS for')
+ ..addOption('output-wav', help: 'Filename to save the generated audio')
+ ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+ ..addOption(
+ 'sid',
+ help: 'Speaker ID to select. Used only for multi-speaker TTS',
+ defaultsTo: '0',
+ );
+ final res = parser.parse(arguments);
+ if (res['model'] == null ||
+ res['voices'] == null ||
+ res['tokens'] == null ||
+ res['data-dir'] == null ||
+ res['output-wav'] == null ||
+ res['text'] == null) {
+ print(parser.usage);
+ exit(1);
+ }
+ final model = res['model'] as String;
+ final voices = res['voices'] as String;
+ final tokens = res['tokens'] as String;
+ final dataDir = res['data-dir'] as String;
+ final ruleFsts = res['rule-fsts'] as String;
+ final ruleFars = res['rule-fars'] as String;
+ final text = res['text'] as String;
+ final outputWav = res['output-wav'] as String;
+ var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+ final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+ if (speed == 0) {
+ speed = 1.0;
+ }
+
+ final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
+ model: model,
+ voices: voices,
+ tokens: tokens,
+ dataDir: dataDir,
+ lengthScale: 1 / speed,
+ );
+
+ final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+ kokoro: kokoro,
+ numThreads: 1,
+ debug: true,
+ );
+ final config = sherpa_onnx.OfflineTtsConfig(
+ model: modelConfig,
+ maxNumSenetences: 1,
+ ruleFsts: ruleFsts,
+ ruleFars: ruleFars,
+ );
+
+ final tts = sherpa_onnx.OfflineTts(config);
+ final audio = tts.generate(text: text, sid: sid, speed: speed);
+ tts.free();
+
+ sherpa_onnx.writeWave(
+ filename: outputWav,
+ samples: audio.samples,
+ sampleRate: audio.sampleRate,
+ );
+ print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/kokoro-zh-en.dart b/dart-api-examples/tts/bin/kokoro-zh-en.dart
new file mode 100644
index 0000000000..31ee4c49fb
--- /dev/null
+++ b/dart-api-examples/tts/bin/kokoro-zh-en.dart
@@ -0,0 +1,102 @@
+// Copyright (c) 2025 Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List arguments) async {
+ await initSherpaOnnx();
+
+ final parser = ArgParser()
+ ..addOption('model', help: 'Path to the onnx model')
+ ..addOption('voices', help: 'Path to the voices.bin')
+ ..addOption('tokens', help: 'Path to tokens.txt')
+ ..addOption(
+ 'data-dir',
+ help: 'Path to espeak-ng-data directory',
+ defaultsTo: '',
+ )
+ ..addOption(
+ 'dict-dir',
+ help: 'Path to dict directory',
+ defaultsTo: '',
+ )
+ ..addOption(
+ 'lexicon',
+ help: 'Path to lexicon files',
+ defaultsTo: '',
+ )
+ ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+ ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+ ..addOption('text', help: 'Text to generate TTS for')
+ ..addOption('output-wav', help: 'Filename to save the generated audio')
+ ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+ ..addOption(
+ 'sid',
+ help: 'Speaker ID to select. Used only for multi-speaker TTS',
+ defaultsTo: '0',
+ );
+ final res = parser.parse(arguments);
+ if (res['model'] == null ||
+ res['voices'] == null ||
+ res['tokens'] == null ||
+ res['data-dir'] == null ||
+ res['dict-dir'] == null ||
+ res['lexicon'] == null ||
+ res['output-wav'] == null ||
+ res['text'] == null) {
+ print(parser.usage);
+ exit(1);
+ }
+ final model = res['model'] as String;
+ final voices = res['voices'] as String;
+ final tokens = res['tokens'] as String;
+ final dataDir = res['data-dir'] as String;
+ final dictDir = res['dict-dir'] as String;
+ final lexicon = res['lexicon'] as String;
+ final ruleFsts = res['rule-fsts'] as String;
+ final ruleFars = res['rule-fars'] as String;
+ final text = res['text'] as String;
+ final outputWav = res['output-wav'] as String;
+ var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+ final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+ if (speed == 0) {
+ speed = 1.0;
+ }
+
+ final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
+ model: model,
+ voices: voices,
+ tokens: tokens,
+ dataDir: dataDir,
+ lengthScale: 1 / speed,
+ dictDir: dictDir,
+ lexicon: lexicon,
+ );
+
+ final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+ kokoro: kokoro,
+ numThreads: 1,
+ debug: true,
+ );
+ final config = sherpa_onnx.OfflineTtsConfig(
+ model: modelConfig,
+ maxNumSenetences: 1,
+ ruleFsts: ruleFsts,
+ ruleFars: ruleFars,
+ );
+
+ final tts = sherpa_onnx.OfflineTts(config);
+ final audio = tts.generate(text: text, sid: sid, speed: speed);
+ tts.free();
+
+ sherpa_onnx.writeWave(
+ filename: outputWav,
+ samples: audio.samples,
+ sampleRate: audio.sampleRate,
+ );
+ print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/matcha-en.dart b/dart-api-examples/tts/bin/matcha-en.dart
new file mode 100644
index 0000000000..fa4c076530
--- /dev/null
+++ b/dart-api-examples/tts/bin/matcha-en.dart
@@ -0,0 +1,86 @@
+// Copyright (c) 2025 Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List arguments) async {
+ await initSherpaOnnx();
+
+ final parser = ArgParser()
+ ..addOption('acoustic-model', help: 'Path to the acoustic model')
+ ..addOption('vocoder', help: 'Path to the vocoder model')
+ ..addOption('tokens', help: 'Path to tokens.txt')
+ ..addOption(
+ 'data-dir',
+ help: 'Path to espeak-ng-data directory',
+ defaultsTo: '',
+ )
+ ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+ ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+ ..addOption('text', help: 'Text to generate TTS for')
+ ..addOption('output-wav', help: 'Filename to save the generated audio')
+ ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+ ..addOption(
+ 'sid',
+ help: 'Speaker ID to select. Used only for multi-speaker TTS',
+ defaultsTo: '0',
+ );
+ final res = parser.parse(arguments);
+ if (res['acoustic-model'] == null ||
+ res['vocoder'] == null ||
+ res['tokens'] == null ||
+ res['data-dir'] == null ||
+ res['output-wav'] == null ||
+ res['text'] == null) {
+ print(parser.usage);
+ exit(1);
+ }
+ final acousticModel = res['acoustic-model'] as String;
+ final vocoder = res['vocoder'] as String;
+ final tokens = res['tokens'] as String;
+ final dataDir = res['data-dir'] as String;
+ final ruleFsts = res['rule-fsts'] as String;
+ final ruleFars = res['rule-fars'] as String;
+ final text = res['text'] as String;
+ final outputWav = res['output-wav'] as String;
+ var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+ final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+ if (speed == 0) {
+ speed = 1.0;
+ }
+
+ final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
+ acousticModel: acousticModel,
+ vocoder: vocoder,
+ tokens: tokens,
+ dataDir: dataDir,
+ lengthScale: 1 / speed,
+ );
+
+ final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+ matcha: matcha,
+ numThreads: 1,
+ debug: true,
+ );
+ final config = sherpa_onnx.OfflineTtsConfig(
+ model: modelConfig,
+ maxNumSenetences: 1,
+ ruleFsts: ruleFsts,
+ ruleFars: ruleFars,
+ );
+
+ final tts = sherpa_onnx.OfflineTts(config);
+ final audio = tts.generate(text: text, sid: sid, speed: speed);
+ tts.free();
+
+ sherpa_onnx.writeWave(
+ filename: outputWav,
+ samples: audio.samples,
+ sampleRate: audio.sampleRate,
+ );
+ print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/matcha-zh.dart b/dart-api-examples/tts/bin/matcha-zh.dart
new file mode 100644
index 0000000000..d52175e747
--- /dev/null
+++ b/dart-api-examples/tts/bin/matcha-zh.dart
@@ -0,0 +1,90 @@
+// Copyright (c) 2025 Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List arguments) async {
+ await initSherpaOnnx();
+
+ final parser = ArgParser()
+ ..addOption('acoustic-model', help: 'Path to the acoustic model')
+ ..addOption('vocoder', help: 'Path to the vocoder model')
+ ..addOption('tokens', help: 'Path to tokens.txt')
+ ..addOption('lexicon', help: 'Path to lexicon.txt')
+ ..addOption(
+ 'dict-dir',
+ help: 'Path to jieba dict directory',
+ defaultsTo: '',
+ )
+ ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+ ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+ ..addOption('text', help: 'Text to generate TTS for')
+ ..addOption('output-wav', help: 'Filename to save the generated audio')
+ ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+ ..addOption(
+ 'sid',
+ help: 'Speaker ID to select. Used only for multi-speaker TTS',
+ defaultsTo: '0',
+ );
+ final res = parser.parse(arguments);
+ if (res['acoustic-model'] == null ||
+ res['vocoder'] == null ||
+ res['lexicon'] == null ||
+ res['tokens'] == null ||
+ res['dict-dir'] == null ||
+ res['output-wav'] == null ||
+ res['text'] == null) {
+ print(parser.usage);
+ exit(1);
+ }
+ final acousticModel = res['acoustic-model'] as String;
+ final vocoder = res['vocoder'] as String;
+ final lexicon = res['lexicon'] as String;
+ final tokens = res['tokens'] as String;
+ final dictDir = res['dict-dir'] as String;
+ final ruleFsts = res['rule-fsts'] as String;
+ final ruleFars = res['rule-fars'] as String;
+ final text = res['text'] as String;
+ final outputWav = res['output-wav'] as String;
+ var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+ final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+ if (speed == 0) {
+ speed = 1.0;
+ }
+
+ final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
+ acousticModel: acousticModel,
+ vocoder: vocoder,
+ lexicon: lexicon,
+ tokens: tokens,
+ dictDir: dictDir,
+ lengthScale: 1 / speed,
+ );
+
+ final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+ matcha: matcha,
+ numThreads: 1,
+ debug: true,
+ );
+ final config = sherpa_onnx.OfflineTtsConfig(
+ model: modelConfig,
+ maxNumSenetences: 1,
+ ruleFsts: ruleFsts,
+ ruleFars: ruleFars,
+ );
+
+ final tts = sherpa_onnx.OfflineTts(config);
+ final audio = tts.generate(text: text, sid: sid, speed: speed);
+ tts.free();
+
+ sherpa_onnx.writeWave(
+ filename: outputWav,
+ samples: audio.samples,
+ sampleRate: audio.sampleRate,
+ );
+ print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/zh.dart b/dart-api-examples/tts/bin/vits-zh.dart
similarity index 100%
rename from dart-api-examples/tts/bin/zh.dart
rename to dart-api-examples/tts/bin/vits-zh.dart
diff --git a/dart-api-examples/tts/pubspec.yaml b/dart-api-examples/tts/pubspec.yaml
index 51e7a9d9ec..860ed94404 100644
--- a/dart-api-examples/tts/pubspec.yaml
+++ b/dart-api-examples/tts/pubspec.yaml
@@ -8,7 +8,7 @@ environment:
# Add regular dependencies here.
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dart-api-examples/tts/run-kokoro-en.sh b/dart-api-examples/tts/run-kokoro-en.sh
new file mode 100755
index 0000000000..78e21a2860
--- /dev/null
+++ b/dart-api-examples/tts/run-kokoro-en.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+ tar xf kokoro-en-v0_19.tar.bz2
+ rm kokoro-en-v0_19.tar.bz2
+fi
+
+dart run \
+ ./bin/kokoro-en.dart \
+ --model ./kokoro-en-v0_19/model.onnx \
+ --voices ./kokoro-en-v0_19/voices.bin \
+ --tokens ./kokoro-en-v0_19/tokens.txt \
+ --data-dir ./kokoro-en-v0_19/espeak-ng-data \
+ --sid 9 \
+ --speed 1.0 \
+ --output-wav kokoro-en-9.wav \
+ --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-kokoro-zh-en.sh b/dart-api-examples/tts/run-kokoro-zh-en.sh
new file mode 100755
index 0000000000..42e4851d88
--- /dev/null
+++ b/dart-api-examples/tts/run-kokoro-zh-en.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+ tar xf kokoro-multi-lang-v1_0.tar.bz2
+ rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+dart run \
+ ./bin/kokoro-zh-en.dart \
+ --model ./kokoro-multi-lang-v1_0/model.onnx \
+ --voices ./kokoro-multi-lang-v1_0/voices.bin \
+ --tokens ./kokoro-multi-lang-v1_0/tokens.txt \
+ --data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \
+ --dict-dir ./kokoro-multi-lang-v1_0/dict \
+ --lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+ --sid 45 \
+ --speed 1.0 \
+ --output-wav kokoro-zh-en-45.wav \
+ --text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?"
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-matcha-en.sh b/dart-api-examples/tts/run-matcha-en.sh
new file mode 100755
index 0000000000..f727ee5c82
--- /dev/null
+++ b/dart-api-examples/tts/run-matcha-en.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dart run \
+ ./bin/matcha-en.dart \
+ --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --vocoder ./hifigan_v2.onnx \
+ --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \
+ --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --sid 0 \
+ --speed 1.0 \
+ --output-wav matcha-en-1.wav \
+ --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-matcha-zh.sh b/dart-api-examples/tts/run-matcha-zh.sh
new file mode 100755
index 0000000000..be95a827aa
--- /dev/null
+++ b/dart-api-examples/tts/run-matcha-zh.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dart run \
+ ./bin/matcha-zh.dart \
+ --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --vocoder ./hifigan_v2.onnx \
+ --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
+ --tokens ./matcha-icefall-zh-baker/tokens.txt \
+ --dict-dir ./matcha-icefall-zh-baker/dict \
+ --rule-fsts ./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --sid 0 \
+ --speed 1.0 \
+ --output-wav matcha-zh-1.wav \
+ --text "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" \
+
+dart run \
+ ./bin/matcha-zh.dart \
+ --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --vocoder ./hifigan_v2.onnx \
+ --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
+ --tokens ./matcha-icefall-zh-baker/tokens.txt \
+ --dict-dir ./matcha-icefall-zh-baker/dict \
+ --sid 0 \
+ --speed 1.0 \
+ --output-wav matcha-zh-2.wav \
+ --text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔." \
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-zh.sh b/dart-api-examples/tts/run-vits-zh.sh
similarity index 92%
rename from dart-api-examples/tts/run-zh.sh
rename to dart-api-examples/tts/run-vits-zh.sh
index 057260b619..2298f9eb16 100755
--- a/dart-api-examples/tts/run-zh.sh
+++ b/dart-api-examples/tts/run-vits-zh.sh
@@ -16,7 +16,7 @@ if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then
fi
dart run \
- ./bin/zh.dart \
+ ./bin/vits-zh.dart \
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
@@ -24,10 +24,10 @@ dart run \
--sid 2 \
--speed 1.0 \
--text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \
- --output-wav zh-jieba-2.wav
+ --output-wav vits-zh-jieba-2.wav
dart run \
- ./bin/zh.dart \
+ ./bin/vits-zh.dart \
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
@@ -36,6 +36,6 @@ dart run \
--sid 3 \
--speed 1.0 \
--text '今天是2024年6月15号,13点23分。如果有困难,请拨打110或者18920240511。123456块钱。' \
- --output-wav zh-jieba-3.wav
+ --output-wav vits-zh-jieba-3.wav
ls -lh *.wav
diff --git a/dart-api-examples/vad-with-non-streaming-asr/bin/moonshine.dart b/dart-api-examples/vad-with-non-streaming-asr/bin/moonshine.dart
new file mode 100644
index 0000000000..f9d96e694b
--- /dev/null
+++ b/dart-api-examples/vad-with-non-streaming-asr/bin/moonshine.dart
@@ -0,0 +1,134 @@
+// Copyright (c) 2024 Xiaomi Corporation
+import 'dart:io';
+import 'dart:typed_data';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List arguments) async {
+ await initSherpaOnnx();
+
+ final parser = ArgParser()
+ ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
+ ..addOption('preprocessor',
+ help: 'Path to the moonshine preprocessor model')
+ ..addOption('encoder', help: 'Path to the moonshine encoder model')
+ ..addOption('uncached-decoder',
+ help: 'Path to moonshine uncached decoder model')
+ ..addOption('cached-decoder',
+ help: 'Path to moonshine cached decoder model')
+ ..addOption('tokens', help: 'Path to tokens.txt')
+ ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+
+ final res = parser.parse(arguments);
+ if (res['silero-vad'] == null ||
+ res['preprocessor'] == null ||
+ res['encoder'] == null ||
+ res['uncached-decoder'] == null ||
+ res['cached-decoder'] == null ||
+ res['tokens'] == null ||
+ res['input-wav'] == null) {
+ print(parser.usage);
+ exit(1);
+ }
+
+ // create VAD
+ final sileroVad = res['silero-vad'] as String;
+
+ final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
+ model: sileroVad,
+ minSilenceDuration: 0.25,
+ minSpeechDuration: 0.5,
+ maxSpeechDuration: 5.0,
+ );
+
+ final vadConfig = sherpa_onnx.VadModelConfig(
+ sileroVad: sileroVadConfig,
+ numThreads: 1,
+ debug: true,
+ );
+
+ final vad = sherpa_onnx.VoiceActivityDetector(
+ config: vadConfig, bufferSizeInSeconds: 10);
+
+ // create whisper recognizer
+ final preprocessor = res['preprocessor'] as String;
+ final encoder = res['encoder'] as String;
+ final uncachedDecoder = res['uncached-decoder'] as String;
+ final cachedDecoder = res['cached-decoder'] as String;
+ final tokens = res['tokens'] as String;
+ final inputWav = res['input-wav'] as String;
+
+ final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
+ preprocessor: preprocessor,
+ encoder: encoder,
+ uncachedDecoder: uncachedDecoder,
+ cachedDecoder: cachedDecoder,
+ );
+ final modelConfig = sherpa_onnx.OfflineModelConfig(
+ moonshine: moonshine,
+ tokens: tokens,
+ debug: false,
+ numThreads: 1,
+ );
+ final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+ final recognizer = sherpa_onnx.OfflineRecognizer(config);
+
+ final waveData = sherpa_onnx.readWave(inputWav);
+ if (waveData.sampleRate != 16000) {
+ print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
+ exit(1);
+ }
+
+ int numSamples = waveData.samples.length;
+ int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
+
+ for (int i = 0; i != numIter; ++i) {
+ int start = i * vadConfig.sileroVad.windowSize;
+ vad.acceptWaveform(Float32List.sublistView(
+ waveData.samples, start, start + vadConfig.sileroVad.windowSize));
+
+ while (!vad.isEmpty()) {
+ final samples = vad.front().samples;
+ final startTime = vad.front().start.toDouble() / waveData.sampleRate;
+ final endTime =
+ startTime + samples.length.toDouble() / waveData.sampleRate;
+
+ final stream = recognizer.createStream();
+ stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
+ recognizer.decode(stream);
+
+ final result = recognizer.getResult(stream);
+ stream.free();
+ print(
+ '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
+
+ vad.pop();
+ }
+ }
+
+ vad.flush();
+
+ while (!vad.isEmpty()) {
+ final samples = vad.front().samples;
+ final startTime = vad.front().start.toDouble() / waveData.sampleRate;
+ final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
+
+ final stream = recognizer.createStream();
+ stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
+ recognizer.decode(stream);
+
+ final result = recognizer.getResult(stream);
+ stream.free();
+ print(
+ '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
+
+ vad.pop();
+ }
+
+ vad.free();
+
+ recognizer.free();
+}
diff --git a/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml b/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
index 66d8ca1127..0de40da6f0 100644
--- a/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
+++ b/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
@@ -10,7 +10,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dart-api-examples/vad-with-non-streaming-asr/run-moonshine.sh b/dart-api-examples/vad-with-non-streaming-asr/run-moonshine.sh
new file mode 100755
index 0000000000..cd531fec5a
--- /dev/null
+++ b/dart-api-examples/vad-with-non-streaming-asr/run-moonshine.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+if [ ! -f ./Obama.wav ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+fi
+
+if [[ ! -f ./silero_vad.onnx ]]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+dart run \
+ ./bin/moonshine.dart \
+ --silero-vad ./silero_vad.onnx \
+ --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+ --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+ --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+ --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+ --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+ --input-wav ./Obama.wav
diff --git a/dart-api-examples/vad/pubspec.yaml b/dart-api-examples/vad/pubspec.yaml
index 2535e60748..9063da114b 100644
--- a/dart-api-examples/vad/pubspec.yaml
+++ b/dart-api-examples/vad/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
path: ^1.9.0
args: ^2.5.0
diff --git a/dotnet-examples/Common/Common.csproj b/dotnet-examples/Common/Common.csproj
index a9630614f4..57c0ff743f 100644
--- a/dotnet-examples/Common/Common.csproj
+++ b/dotnet-examples/Common/Common.csproj
@@ -1,7 +1,7 @@
- net6.0
+ net8.0
true
diff --git a/dotnet-examples/Common/WaveHeader.cs b/dotnet-examples/Common/WaveHeader.cs
index 7d13b35537..0a6ca52845 100644
--- a/dotnet-examples/Common/WaveHeader.cs
+++ b/dotnet-examples/Common/WaveHeader.cs
@@ -4,171 +4,166 @@
using System.Runtime.InteropServices;
-namespace SherpaOnnx
-{
+namespace SherpaOnnx;
- [StructLayout(LayoutKind.Sequential)]
- public struct WaveHeader
+[StructLayout(LayoutKind.Sequential)]
+public struct WaveHeader
+{
+ public int ChunkID;
+ public int ChunkSize;
+ public int Format;
+ public int SubChunk1ID;
+ public int SubChunk1Size;
+ public short AudioFormat;
+ public short NumChannels;
+ public int SampleRate;
+ public int ByteRate;
+ public short BlockAlign;
+ public short BitsPerSample;
+ public int SubChunk2ID;
+ public int SubChunk2Size;
+
+ public bool Validate()
{
- public Int32 ChunkID;
- public Int32 ChunkSize;
- public Int32 Format;
- public Int32 SubChunk1ID;
- public Int32 SubChunk1Size;
- public Int16 AudioFormat;
- public Int16 NumChannels;
- public Int32 SampleRate;
- public Int32 ByteRate;
- public Int16 BlockAlign;
- public Int16 BitsPerSample;
- public Int32 SubChunk2ID;
- public Int32 SubChunk2Size;
-
- public bool Validate()
+ if (ChunkID != 0x46464952)
+ {
+ Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
+ return false;
+ }
+
+ // E V A W
+ if (Format != 0x45564157)
+ {
+ Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
+ return false;
+ }
+
+ // t m f
+ if (SubChunk1ID != 0x20746d66)
+ {
+ Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
+ return false;
+ }
+
+ if (SubChunk1Size != 16)
+ {
+ Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
+ return false;
+ }
+
+ if (AudioFormat != 1)
+ {
+ Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
+ return false;
+ }
+
+ if (NumChannels != 1)
+ {
+ Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
+ return false;
+ }
+
+ if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
+ {
+ Console.WriteLine($"Invalid byte rate: {ByteRate}.");
+ return false;
+ }
+
+ if (BlockAlign != (NumChannels * BitsPerSample / 8))
{
- if (ChunkID != 0x46464952)
- {
- Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
- return false;
- }
-
- // E V A W
- if (Format != 0x45564157)
- {
- Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
- return false;
- }
-
- // t m f
- if (SubChunk1ID != 0x20746d66)
- {
- Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
- return false;
- }
-
- if (SubChunk1Size != 16)
- {
- Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
- return false;
- }
-
- if (AudioFormat != 1)
- {
- Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
- return false;
- }
-
- if (NumChannels != 1)
- {
- Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
- return false;
- }
-
- if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
- {
- Console.WriteLine($"Invalid byte rate: {ByteRate}.");
- return false;
- }
-
- if (BlockAlign != (NumChannels * BitsPerSample / 8))
- {
- Console.WriteLine($"Invalid block align: {ByteRate}.");
- return false;
- }
-
- if (BitsPerSample != 16)
- { // we support only 16 bits per sample
- Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
- return false;
- }
-
- return true;
+ Console.WriteLine($"Invalid block align: {ByteRate}.");
+ return false;
}
+
+ if (BitsPerSample != 16)
+ { // we support only 16 bits per sample
+ Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
+ return false;
+ }
+
+ return true;
}
+}
- // It supports only 16-bit, single channel WAVE format.
- // The sample rate can be any value.
- public class WaveReader
+// It supports only 16-bit, single channel WAVE format.
+// The sample rate can be any value.
+public class WaveReader
+{
+ public WaveReader(string fileName)
{
- public WaveReader(String fileName)
+ if (!File.Exists(fileName))
{
- if (!File.Exists(fileName))
- {
- throw new ApplicationException($"{fileName} does not exist!");
- }
-
- using (var stream = File.Open(fileName, FileMode.Open))
- {
- using (var reader = new BinaryReader(stream))
- {
- _header = ReadHeader(reader);
-
- if (!_header.Validate())
- {
- throw new ApplicationException($"Invalid wave file ${fileName}");
- }
-
- SkipMetaData(reader);
-
- // now read samples
- // _header.SubChunk2Size contains number of bytes in total.
- // we assume each sample is of type int16
- byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
- short[] samples_int16 = new short[_header.SubChunk2Size / 2];
- Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
-
- _samples = new float[samples_int16.Length];
-
- for (var i = 0; i < samples_int16.Length; ++i)
- {
- _samples[i] = samples_int16[i] / 32768.0F;
- }
- }
- }
+ throw new ApplicationException($"{fileName} does not exist!");
}
- private static WaveHeader ReadHeader(BinaryReader reader)
- {
- byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
+ using var stream = File.Open(fileName, FileMode.Open);
+ using var reader = new BinaryReader(stream);
- GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
- WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
- handle.Free();
+ _header = ReadHeader(reader);
- return header;
+ if (!_header.Validate())
+ {
+ throw new ApplicationException($"Invalid wave file ${fileName}");
}
- private void SkipMetaData(BinaryReader reader)
+ SkipMetaData(reader);
+
+ // now read samples
+ // _header.SubChunk2Size contains number of bytes in total.
+ // we assume each sample is of type int16
+ var buffer = reader.ReadBytes(_header.SubChunk2Size);
+ var samples_int16 = new short[_header.SubChunk2Size / 2];
+ Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
+
+ _samples = new float[samples_int16.Length];
+
+ for (var i = 0; i < samples_int16.Length; ++i)
{
- var bs = reader.BaseStream;
-
- Int32 subChunk2ID = _header.SubChunk2ID;
- Int32 subChunk2Size = _header.SubChunk2Size;
-
- while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
- {
- bs.Seek(subChunk2Size, SeekOrigin.Current);
- subChunk2ID = reader.ReadInt32();
- subChunk2Size = reader.ReadInt32();
- }
- _header.SubChunk2ID = subChunk2ID;
- _header.SubChunk2Size = subChunk2Size;
+ _samples[i] = samples_int16[i] / 32768.0F;
}
+ }
- private WaveHeader _header;
+ private static WaveHeader ReadHeader(BinaryReader reader)
+ {
+ var bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
+
+ GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
+ WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
+ handle.Free();
+
+ return header;
+ }
- // Samples are normalized to the range [-1, 1]
- private float[] _samples;
+ private void SkipMetaData(BinaryReader reader)
+ {
+ var bs = reader.BaseStream;
- public int SampleRate => _header.SampleRate;
- public float[] Samples => _samples;
+ var subChunk2ID = _header.SubChunk2ID;
+ var subChunk2Size = _header.SubChunk2Size;
- public static void Test(String fileName)
+ while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
{
- WaveReader reader = new WaveReader(fileName);
- Console.WriteLine($"samples length: {reader.Samples.Length}");
- Console.WriteLine($"samples rate: {reader.SampleRate}");
+ bs.Seek(subChunk2Size, SeekOrigin.Current);
+ subChunk2ID = reader.ReadInt32();
+ subChunk2Size = reader.ReadInt32();
}
+ _header.SubChunk2ID = subChunk2ID;
+ _header.SubChunk2Size = subChunk2Size;
}
+ private WaveHeader _header;
+
+ // Samples are normalized to the range [-1, 1]
+ private float[] _samples;
+
+ public int SampleRate => _header.SampleRate;
+
+ public float[] Samples => _samples;
+
+ public static void Test(string fileName)
+ {
+ WaveReader reader = new WaveReader(fileName);
+ Console.WriteLine($"samples length: {reader.Samples.Length}");
+ Console.WriteLine($"samples rate: {reader.SampleRate}");
+ }
}
diff --git a/dotnet-examples/TTS/PlayAudioPartial/SherpaOnnxGeneratedAudioResultPlayAudio.cs b/dotnet-examples/TTS/PlayAudioPartial/SherpaOnnxGeneratedAudioResultPlayAudio.cs
deleted file mode 100644
index 1eb1e3568c..0000000000
--- a/dotnet-examples/TTS/PlayAudioPartial/SherpaOnnxGeneratedAudioResultPlayAudio.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-using NAudio.Wave;
-
-namespace TTS.Struct
-{
- public sealed partial class SherpaOnnxGeneratedAudioResult
- {
- private WaveOutEvent waveOut;
- private WaveFormat waveFormat;
- private BufferedWaveProvider bufferedWaveProvider;
-
- private int bufferLength = 1;
-
- public TimeSpan? AudioDuration => bufferedWaveProvider?.BufferedDuration;
-
- public float PlayProgress => (waveOut?.GetPosition() * 1.0f / bufferLength).Value;
-
- public void Play()
- {
- waveOut ??= new WaveOutEvent();
-
- waveFormat ??= new WaveFormat(sample_rate, AudioDataBit, Channels); // 32-bit 浮点,单声道
-
- if (bufferedWaveProvider == null)
- {
- bufferedWaveProvider ??= new BufferedWaveProvider(waveFormat);
-
- var buffer = AudioByteData;
-
- bufferLength = buffer.Length;
-
- bufferedWaveProvider.AddSamples(buffer, 0, bufferLength);
- bufferedWaveProvider.BufferLength = bufferLength;
- waveOut.Init(bufferedWaveProvider);
- }
- waveOut.Play();
- }
-
- public void Stop()
- {
- waveOut?.Stop();
- }
-
- }
-}
diff --git a/dotnet-examples/TTS/Program.cs b/dotnet-examples/TTS/Program.cs
deleted file mode 100644
index 07bb1325ff..0000000000
--- a/dotnet-examples/TTS/Program.cs
+++ /dev/null
@@ -1,66 +0,0 @@
-using System.Text;
-using TTS;
-using TTS.Struct;
-
-internal class Program
-{
- private static void Main(string[] args)
- {
- SherpaOnnxOfflineTtsConfig sherpaOnnxOfflineTtsConfig = new SherpaOnnxOfflineTtsConfig();
- sherpaOnnxOfflineTtsConfig.model = new SherpaOnnxOfflineTtsModelConfig
- {
- debug = 0,
- num_threads = 4,
- provider = "cpu",
- vits = new SherpaOnnxOfflineTtsVitsModelConfig
- {
- //lexicon = "vits-zh-aishell3/lexicon.txt",
- //model = "vits-zh-aishell3/vits-aishell3.onnx",
- //tokens = "vits-zh-aishell3/tokens.txt",
- model = @"C:\Services\Sherpa\model.onnx",
- lexicon = "",
- tokens = @"C:\Services\Sherpa\tokens.txt",
- data_dir = @"C:\Services\Sherpa\espeak-ng-data",
-
- noise_scale = 0.667f,
- noise_scale_w = 0.8f,
- length_scale = 1,
- },
-
- };
-
- TTSCore i = new TTSCore(sherpaOnnxOfflineTtsConfig);
-
- Console.InputEncoding = Encoding.Unicode;
- Console.OutputEncoding = Encoding.UTF8;
-
- while (true)
- {
- var str = Console.ReadLine();
- var audioResult = i.ToSpeech(str, 40, 1f);
-
- // audioResult.WriteWAVFile("123.wav");保存本地
-
- audioResult.Play();
-
- int lastIndex = -1;
- while (audioResult.PlayProgress <= 1f)
- {
- int index = (int)(audioResult.PlayProgress * (str.Length - 1));
- if (lastIndex != index)
- {
- Console.Write(str[index]);
- lastIndex = index;
- }
- Thread.Sleep(100);
- }
-
- if (++lastIndex < str.Length)
- Console.Write(str[lastIndex]);
-
- Console.WriteLine();
-
- }
-
- }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxGeneratedAudio.cs b/dotnet-examples/TTS/Struct/SherpaOnnxGeneratedAudio.cs
deleted file mode 100644
index affc3a0347..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxGeneratedAudio.cs
+++ /dev/null
@@ -1,198 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Runtime.InteropServices;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace TTS.Struct
-{
- ///
- /// 生成语音结果
- ///
- public sealed partial class SherpaOnnxGeneratedAudioResult : IDisposable
- {
- public const string Filename = "sherpa-onnx-c-api";
-
- ///
- /// 销毁非托管内存
- ///
- ///
- [DllImport(Filename)]
- private static extern void SherpaOnnxDestroyOfflineTtsGeneratedAudio(IntPtr ttsGenerateIntptr);
-
- [DllImport(Filename)]
- private static extern int SherpaOnnxWriteWave(IntPtr q, int n, int sample_rate, string filename);
-
- ///
- /// 音频数据比特
- ///
- public const int AudioDataBit = 16;
- ///
- /// 单通道
- ///
- public const int Channels = 1;
-
- ///
- /// 原生句柄
- ///
- internal IntPtr thisHandle;
-
- internal readonly IntPtr audioData;
- internal readonly int dataSize;
-
- ///
- /// 采样率
- ///
- public readonly int sample_rate;
-
- ///
- /// 音频数据指针
- ///
- public IntPtr AudioDataIntPtr => audioData;
-
- ///
- /// 数据的大小
- ///
- public unsafe int AudioDataLength
- {
- get
- {
- return dataSize;
-
- //float* buffer = (float*)audioData;
- //while (*buffer != 0)
- // ++buffer;
- //return (int)(buffer - (float*)audioData);
- }
- }
-
- ///
- /// 获得音频数据 float[]
- /// 这个内部创建一个数组
- ///
- public unsafe float[] AudioFloatData
- {
- get
- {
- int length = AudioDataLength;
-
- float[] floatAudioData = new float[length];
- Marshal.Copy(audioData, floatAudioData, 0, floatAudioData.Length);
- return floatAudioData;
- }
- }
-
-
- ///
- /// 获得音频数据 byte[]
- /// 这个内部创建一个数组
- ///
- public byte[] AudioByteData
- {
- get
- {
- byte[] bytes = new byte[AudioDataLength * 2];
- ReadData(bytes, 0);
- return bytes;
- }
- }
-
- internal SherpaOnnxGeneratedAudioResult(IntPtr intPtr, SherpaOnnxGeneratedAudio sherpaOnnx)
- {
- this.thisHandle = intPtr;
- this.audioData = sherpaOnnx.audioData;
- this.dataSize = sherpaOnnx.dataSize;
- this.sample_rate = sherpaOnnx.sample_rate;
- }
-
- ~SherpaOnnxGeneratedAudioResult()
- {
- Dispose();
- }
-
- ///
- /// 读取数据
- /// 没有垃圾产生,自己传递数组进来
- ///
- /// 数组
- /// 数组那个位置写入
- /// 写入了多少个
- public int ReadData(float[] audioFloats, int offset)
- {
- int length = AudioDataLength;
-
- int c = audioFloats.Length - offset;
- length = c >= length ? length : c;
-
- Marshal.Copy(audioData, audioFloats, offset, length);
- return length;
- }
-
- ///
- /// 读取数据
- /// 这个内部转换成byte[] 音频数组
- /// 没有垃圾产生,自己传递数组进来
- ///
- /// 数组,这个长度需要是AudioDataLength*2大小
- /// 数组那个位置写入
- /// 写入了多少个
- public int ReadData(byte[] audioFloats, int offset)
- {
- //因为是16bit存储音频数据,所以float会转换成两个字节存储
- var audiodata = AudioFloatData;
-
- int length = audiodata.Length * 2;
-
- int c = audioFloats.Length - offset;
- c = c % 2 == 0 ? c : c - 1;
-
- length = c >= length ? length : c;
-
- int p = length / 2;
-
- for (int i = 0; i < p; i++)
- {
- short value = (short)(audiodata[i] * short.MaxValue);
-
- audioFloats[offset++] = (byte)value;
- audioFloats[offset++] = (byte)(value >> 8);
- }
-
- return length;
-
- }
-
- ///
- /// 写入WAV音频数据
- ///
- ///
- ///
- public bool WriteWAVFile(string filename)
- {
- return 1 == SherpaOnnxWriteWave(audioData, this.dataSize, this.sample_rate, filename);
- }
-
- public void Dispose()
- {
- if (this.thisHandle != IntPtr.Zero)
- {
- SherpaOnnxDestroyOfflineTtsGeneratedAudio(this.thisHandle);
- GC.SuppressFinalize(this);
- this.thisHandle = IntPtr.Zero;
- }
- }
- }
-
- [StructLayout(LayoutKind.Sequential)]
- internal struct SherpaOnnxGeneratedAudio
- {
- internal readonly IntPtr audioData;
- internal readonly int dataSize;
-
- ///
- /// 采样率
- ///
- public readonly int sample_rate;
- }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsConfig.cs b/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsConfig.cs
deleted file mode 100644
index f33e37dcd7..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsConfig.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-using System.Runtime.InteropServices;
-
-namespace TTS.Struct
-{
- [StructLayout(LayoutKind.Sequential)]
- public struct SherpaOnnxOfflineTtsConfig
- {
- public SherpaOnnxOfflineTtsModelConfig model;
-
- [MarshalAs(UnmanagedType.LPStr)]
- public string rule_fsts;
-
- public int max_num_sentences;
-
- [MarshalAs(UnmanagedType.LPStr)]
- public string rule_fars;
- }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsModelConfig.cs b/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsModelConfig.cs
deleted file mode 100644
index 46dd558599..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsModelConfig.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-using System.Runtime.InteropServices;
-
-namespace TTS.Struct
-{
- [StructLayout(LayoutKind.Sequential)]
- public struct SherpaOnnxOfflineTtsModelConfig
- {
- ///
- /// 模型配置
- ///
- public SherpaOnnxOfflineTtsVitsModelConfig vits;
- ///
- /// 线程数
- ///
- public int num_threads;
- public int debug;
- ///
- /// 使用cpu
- ///
- [MarshalAs(UnmanagedType.LPStr)]
- public string provider;
- }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsVitsModelConfig.cs b/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsVitsModelConfig.cs
deleted file mode 100644
index 266df5ae74..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsVitsModelConfig.cs
+++ /dev/null
@@ -1,56 +0,0 @@
-using System.Runtime.InteropServices;
-
-namespace TTS.Struct
-{
- [StructLayout(LayoutKind.Sequential)]
- public struct SherpaOnnxOfflineTtsVitsModelConfig
- {
- ///
- /// 模型
- /// "vits-zh-aishell3/vits-aishell3.onnx"
- ///
- [MarshalAs(UnmanagedType.LPStr)]
- public string model;
- ///
- /// 词典文件
- /// "vits-zh-aishell3/lexicon.txt"
- ///
- [MarshalAs(UnmanagedType.LPStr)]
- public string lexicon;
-
- [MarshalAs(UnmanagedType.LPStr)]
- public string tokens;
-
- [MarshalAs(UnmanagedType.LPStr)]
- public string data_dir;
-
- ///
- /// VITS模型的noise_scale (float,默认值= 0.667)
- ///
- public float noise_scale = 0.667f;
- ///
- /// VITS模型的noise_scale_w (float,默认值= 0.8)
- ///
- public float noise_scale_w = 0.8f;
- ///
- /// 演讲的速度。大→慢;小→更快。(float, default = 1)
- ///
- public float length_scale = 1f;
-
- [MarshalAs(UnmanagedType.LPStr)]
- public string dict_dir;
-
- public SherpaOnnxOfflineTtsVitsModelConfig()
- {
- noise_scale = 0.667f;
- noise_scale_w = 0.8f;
- length_scale = 1f;
-
- model = "vits-zh-aishell3/vits-aishell3.onnx";
- lexicon = "vits-zh-aishell3/lexicon.txt";
- tokens = "vits-zh-aishell3/tokens.txt";
- data_dir = "";
- dict_dir = "";
- }
- }
-}
diff --git a/dotnet-examples/TTS/TTS.csproj b/dotnet-examples/TTS/TTS.csproj
deleted file mode 100644
index cb1a419ea8..0000000000
--- a/dotnet-examples/TTS/TTS.csproj
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-
- Exe
- net6.0
- enable
- enable
- true
-
-
-
-
-
-
-
diff --git a/dotnet-examples/TTS/TTSCore.cs b/dotnet-examples/TTS/TTSCore.cs
deleted file mode 100644
index a15cb19e69..0000000000
--- a/dotnet-examples/TTS/TTSCore.cs
+++ /dev/null
@@ -1,75 +0,0 @@
-using System.Runtime.InteropServices;
-using TTS.Struct;
-
-namespace TTS
-{
- internal sealed class TTSCore : IDisposable
- {
- public const string Filename = "sherpa-onnx-c-api";
-
- [DllImport(Filename, CallingConvention = CallingConvention.Cdecl)]
- private static extern IntPtr SherpaOnnxCreateOfflineTts(SherpaOnnxOfflineTtsConfig handle);
-
- [DllImport(Filename)]
- private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr createOfflineTtsIntptr, IntPtr text, int sid, float speed);
-
- [DllImport(Filename)]
- private static extern void SherpaOnnxDestroyOfflineTts(IntPtr intPtr);
-
- ///
- /// 原生句柄
- ///
- private IntPtr thisHandle;
-
- public TTSCore(SherpaOnnxOfflineTtsConfig modelConfig)
- {
- IntPtr ttsHandle = SherpaOnnxCreateOfflineTts(modelConfig);
- if (ttsHandle == IntPtr.Zero)
- {
- throw new InvalidOperationException("Failed to create SherpaOnnx TTS engine.");
- }
- thisHandle = ttsHandle;
- }
-
- ///
- /// 文字转语音
- ///
- /// 文字
- /// 音色
- /// 速度
- ///
- public SherpaOnnxGeneratedAudioResult ToSpeech(string text, int sid, float speed = 1f)
- {
- var result = SherpaOnnxOfflineTtsGenerate(thisHandle, Marshal.StringToCoTaskMemUTF8(text), sid, speed);
- SherpaOnnxGeneratedAudio impl = (SherpaOnnxGeneratedAudio)Marshal.PtrToStructure(result, typeof(SherpaOnnxGeneratedAudio));
- return new SherpaOnnxGeneratedAudioResult(result, impl);
- }
-
- ///
- /// 文字转语音
- ///
- /// 文字
- /// 音色
- /// 速度
- ///
- public Task ToSpeechAsync(string text, int sid, float speed = 1f)
- {
- return Task.Run(() => ToSpeech(text, sid, speed));
- }
-
- ~TTSCore()
- {
- Dispose();
- }
-
- public void Dispose()
- {
- if (this.thisHandle != IntPtr.Zero)
- {
- SherpaOnnxDestroyOfflineTts(this.thisHandle);
- GC.SuppressFinalize(this);
- this.thisHandle = IntPtr.Zero;
- }
- }
- }
-}
diff --git a/dotnet-examples/keyword-spotting-from-files/Program.cs b/dotnet-examples/keyword-spotting-from-files/Program.cs
index 2fea260d1d..7ab0da2fa6 100644
--- a/dotnet-examples/keyword-spotting-from-files/Program.cs
+++ b/dotnet-examples/keyword-spotting-from-files/Program.cs
@@ -13,8 +13,6 @@
// dotnet run
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class KeywordSpotterDemo
{
@@ -38,11 +36,11 @@ static void Main(string[] args)
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
- WaveReader waveReader = new WaveReader(filename);
+ var waveReader = new WaveReader(filename);
Console.WriteLine("----------Use pre-defined keywords----------");
- OnlineStream s = kws.CreateStream();
+ var s = kws.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
@@ -53,8 +51,10 @@ static void Main(string[] args)
{
kws.Decode(s);
var result = kws.GetResult(s);
- if (result.Keyword != "")
+ if (result.Keyword != string.Empty)
{
+ // Remember to call Reset() right after detecting a keyword
+ kws.Reset(s);
Console.WriteLine("Detected: {0}", result.Keyword);
}
}
@@ -70,8 +70,10 @@ static void Main(string[] args)
{
kws.Decode(s);
var result = kws.GetResult(s);
- if (result.Keyword != "")
+ if (result.Keyword != string.Empty)
{
+ // Remember to call Reset() right after detecting a keyword
+ kws.Reset(s);
Console.WriteLine("Detected: {0}", result.Keyword);
}
}
@@ -89,8 +91,10 @@ static void Main(string[] args)
{
kws.Decode(s);
var result = kws.GetResult(s);
- if (result.Keyword != "")
+ if (result.Keyword != string.Empty)
{
+ // Remember to call Reset() right after detecting a keyword
+ kws.Reset(s);
Console.WriteLine("Detected: {0}", result.Keyword);
}
}
diff --git a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
index 992f8e0e34..21b9d3ea56 100644
--- a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
+++ b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
keyword_spotting_from_files
enable
enable
diff --git a/dotnet-examples/keyword-spotting-from-microphone/Program.cs b/dotnet-examples/keyword-spotting-from-microphone/Program.cs
index cb0c922f49..140e6a40e5 100644
--- a/dotnet-examples/keyword-spotting-from-microphone/Program.cs
+++ b/dotnet-examples/keyword-spotting-from-microphone/Program.cs
@@ -12,12 +12,9 @@
//
// dotnet run
+using PortAudioSharp;
using SherpaOnnx;
-using System.Collections.Generic;
using System.Runtime.InteropServices;
-using System;
-
-using PortAudioSharp;
class KeywordSpotterDemo
{
@@ -41,11 +38,11 @@ static void Main(string[] args)
var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
- WaveReader waveReader = new WaveReader(filename);
+ var waveReader = new WaveReader(filename);
Console.WriteLine("----------Use pre-defined keywords----------");
- OnlineStream s = kws.CreateStream();
+ var s = kws.CreateStream();
Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize();
@@ -54,7 +51,7 @@ static void Main(string[] args)
for (int i = 0; i != PortAudio.DeviceCount; ++i)
{
Console.WriteLine($" Device {i}");
- DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
+ var deviceInfo = PortAudio.GetDeviceInfo(i);
Console.WriteLine($" Name: {deviceInfo.name}");
Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
@@ -66,12 +63,12 @@ static void Main(string[] args)
Environment.Exit(1);
}
- DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
+ var info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine();
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
- StreamParameters param = new StreamParameters();
+ var param = new StreamParameters();
param.device = deviceIndex;
param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32;
@@ -79,21 +76,21 @@ static void Main(string[] args)
param.hostApiSpecificStreamInfo = IntPtr.Zero;
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
- UInt32 frameCount,
+ uint frameCount,
ref StreamCallbackTimeInfo timeInfo,
StreamCallbackFlags statusFlags,
IntPtr userData
) =>
{
- float[] samples = new float[frameCount];
- Marshal.Copy(input, samples, 0, (Int32)frameCount);
+ var samples = new float[frameCount];
+ Marshal.Copy(input, samples, 0, (int)frameCount);
s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
return StreamCallbackResult.Continue;
};
- PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
+ var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
framesPerBuffer: 0,
streamFlags: StreamFlags.ClipOff,
callback: callback,
@@ -110,18 +107,19 @@ IntPtr userData
while (kws.IsReady(s))
{
kws.Decode(s);
- }
- var result = kws.GetResult(s);
- if (result.Keyword != "")
- {
- Console.WriteLine("Detected: {0}", result.Keyword);
+ var result = kws.GetResult(s);
+ if (result.Keyword != string.Empty)
+ {
+ // Remember to call Reset() right after detecting a keyword
+ kws.Reset(s);
+
+ Console.WriteLine("Detected: {0}", result.Keyword);
+ }
}
Thread.Sleep(200); // ms
}
-
- PortAudio.Terminate();
}
}
diff --git a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
index b3afae784d..12415b81bc 100644
--- a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
+++ b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
keyword_spotting_from_microphone
enable
enable
diff --git a/dotnet-examples/kokoro-tts-play/Program.cs b/dotnet-examples/kokoro-tts-play/Program.cs
new file mode 100644
index 0000000000..eea22cc2fd
--- /dev/null
+++ b/dotnet-examples/kokoro-tts-play/Program.cs
@@ -0,0 +1,189 @@
+// Copyright (c) 2025 Xiaomi Corporation
+//
+// This file shows how to use a non-streaming Kokoro TTS model
+// for text-to-speech
+// Please refer to
+// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+// and
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+// to download pre-trained models
+using PortAudioSharp;
+using SherpaOnnx;
+using System.Collections.Concurrent;
+using System.Runtime.InteropServices;
+
+class OfflineTtsDemo
+{
+ static void Main(string[] args)
+ {
+ var config = new OfflineTtsConfig();
+ config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
+ config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
+ config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
+ config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";
+
+ config.Model.NumThreads = 2;
+ config.Model.Debug = 1;
+ config.Model.Provider = "cpu";
+
+ var tts = new OfflineTts(config);
+ var speed = 1.0f;
+ var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
+ "does not have two-thirds of his day for himself, is a slave, whatever " +
+ "he may be: a statesman, a businessman, an official, or a scholar. " +
+ "Friends fell out often because life was changing so fast. The easiest " +
+ "thing in the world was to lose touch with someone.";
+
+ // mapping of sid to voice name
+ // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+ // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+ var sid = 0;
+
+
+ Console.WriteLine(PortAudio.VersionInfo.versionText);
+ PortAudio.Initialize();
+ Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
+
+ for (int i = 0; i != PortAudio.DeviceCount; ++i)
+ {
+ Console.WriteLine($" Device {i}");
+ DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
+ Console.WriteLine($" Name: {deviceInfo.name}");
+ Console.WriteLine($" Max output channels: {deviceInfo.maxOutputChannels}");
+ Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
+ }
+ int deviceIndex = PortAudio.DefaultOutputDevice;
+ if (deviceIndex == PortAudio.NoDevice)
+ {
+ Console.WriteLine("No default output device found. Please use ../offline-tts instead");
+ Environment.Exit(1);
+ }
+
+ var info = PortAudio.GetDeviceInfo(deviceIndex);
+ Console.WriteLine();
+ Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");
+
+ var param = new StreamParameters();
+ param.device = deviceIndex;
+ param.channelCount = 1;
+ param.sampleFormat = SampleFormat.Float32;
+ param.suggestedLatency = info.defaultLowOutputLatency;
+ param.hostApiSpecificStreamInfo = IntPtr.Zero;
+
+ // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
+ var dataItems = new BlockingCollection();
+
+ var MyCallback = (IntPtr samples, int n, float progress) =>
+ {
+ Console.WriteLine($"Progress {progress*100}%");
+
+ float[] data = new float[n];
+
+ Marshal.Copy(samples, data, 0, n);
+
+ dataItems.Add(data);
+
+ // 1 means to keep generating
+ // 0 means to stop generating
+ return 1;
+ };
+
+ var playFinished = false;
+
+ float[]? lastSampleArray = null;
+ int lastIndex = 0; // not played
+
+ PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
+ UInt32 frameCount,
+ ref StreamCallbackTimeInfo timeInfo,
+ StreamCallbackFlags statusFlags,
+ IntPtr userData
+ ) =>
+ {
+ if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
+ {
+ Console.WriteLine($"Finished playing");
+ playFinished = true;
+ return StreamCallbackResult.Complete;
+ }
+
+ int expected = Convert.ToInt32(frameCount);
+ int i = 0;
+
+ while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
+ {
+ int needed = expected - i;
+
+ if (lastSampleArray != null)
+ {
+ int remaining = lastSampleArray.Length - lastIndex;
+ if (remaining >= needed)
+ {
+ float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
+ lastIndex += needed;
+ if (lastIndex == lastSampleArray.Length)
+ {
+ lastSampleArray = null;
+ lastIndex = 0;
+ }
+
+ Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
+ return StreamCallbackResult.Continue;
+ }
+
+ float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
+ lastIndex = 0;
+ lastSampleArray = null;
+
+ Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
+ i += remaining;
+ continue;
+ }
+
+ if (dataItems.Count != 0)
+ {
+ lastSampleArray = dataItems.Take();
+ lastIndex = 0;
+ }
+ }
+
+ if (i < expected)
+ {
+ int sizeInBytes = (expected - i) * 4;
+ Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
+ }
+
+ return StreamCallbackResult.Continue;
+ };
+
+ PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
+ framesPerBuffer: 0,
+ streamFlags: StreamFlags.ClipOff,
+ callback: playCallback,
+ userData: IntPtr.Zero
+ );
+
+ stream.Start();
+
+ var callback = new OfflineTtsCallbackProgress(MyCallback);
+
+ var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);
+ var outputFilename = "./generated-kokoro-0.wav";
+ var ok = audio.SaveToWaveFile(outputFilename);
+
+ if (ok)
+ {
+ Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+ }
+ else
+ {
+ Console.WriteLine($"Failed to write {outputFilename}");
+ }
+ dataItems.CompleteAdding();
+
+ while (!playFinished)
+ {
+ Thread.Sleep(100); // 100ms
+ }
+ }
+}
diff --git a/dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj b/dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj
new file mode 100644
index 0000000000..6c725686cf
--- /dev/null
+++ b/dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj
@@ -0,0 +1,19 @@
+
+
+
+ Exe
+ net8.0
+ kokoro_tts_play
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dotnet-examples/kokoro-tts-play/run-kokoro-en.sh b/dotnet-examples/kokoro-tts-play/run-kokoro-en.sh
new file mode 100755
index 0000000000..08bdc693a6
--- /dev/null
+++ b/dotnet-examples/kokoro-tts-play/run-kokoro-en.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+set -ex
+
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+ tar xf kokoro-en-v0_19.tar.bz2
+ rm kokoro-en-v0_19.tar.bz2
+fi
+
+dotnet run
diff --git a/dotnet-examples/kokoro-tts/Program.cs b/dotnet-examples/kokoro-tts/Program.cs
new file mode 100644
index 0000000000..b72f98fcbc
--- /dev/null
+++ b/dotnet-examples/kokoro-tts/Program.cs
@@ -0,0 +1,127 @@
+// Copyright (c) 2025 Xiaomi Corporation
+//
+// This file shows how to use a non-streaming Kokoro TTS model
+// for text-to-speech
+// Please refer to
+// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+// and
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+// to download pre-trained models
+using SherpaOnnx;
+using System.Runtime.InteropServices;
+
+class OfflineTtsDemo
+{
+ static void Main(string[] args)
+ {
+
+ TestZhEn();
+ TestEn();
+ }
+
+ static void TestZhEn()
+ {
+ var config = new OfflineTtsConfig();
+ config.Model.Kokoro.Model = "./kokoro-multi-lang-v1_0/model.onnx";
+ config.Model.Kokoro.Voices = "./kokoro-multi-lang-v1_0/voices.bin";
+ config.Model.Kokoro.Tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
+ config.Model.Kokoro.DataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
+ config.Model.Kokoro.DictDir = "./kokoro-multi-lang-v1_0/dict";
+ config.Model.Kokoro.Lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt";
+
+ config.Model.NumThreads = 2;
+ config.Model.Debug = 1;
+ config.Model.Provider = "cpu";
+
+ var tts = new OfflineTts(config);
+ var speed = 1.0f;
+ var text = "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?";
+
+ var sid = 50;
+
+ var MyCallback = (IntPtr samples, int n, float progress) =>
+ {
+ float[] data = new float[n];
+ Marshal.Copy(samples, data, 0, n);
+ // You can process samples here, e.g., play them.
+ // See ../kokoro-tts-playback for how to play them
+ Console.WriteLine($"Progress {progress*100}%");
+
+ // 1 means to keep generating
+ // 0 means to stop generating
+ return 1;
+ };
+
+ var callback = new OfflineTtsCallbackProgress(MyCallback);
+
+ var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);
+
+ var outputFilename = "./generated-kokoro-zh-en.wav";
+ var ok = audio.SaveToWaveFile(outputFilename);
+
+ if (ok)
+ {
+ Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+ }
+ else
+ {
+ Console.WriteLine($"Failed to write {outputFilename}");
+ }
+ }
+
+ static void TestEn()
+ {
+ var config = new OfflineTtsConfig();
+ config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
+ config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
+ config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
+ config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";
+
+ config.Model.NumThreads = 2;
+ config.Model.Debug = 1;
+ config.Model.Provider = "cpu";
+
+ var tts = new OfflineTts(config);
+ var speed = 1.0f;
+ var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
+ "does not have two-thirds of his day for himself, is a slave, whatever " +
+ "he may be: a statesman, a businessman, an official, or a scholar. " +
+ "Friends fell out often because life was changing so fast. The easiest " +
+ "thing in the world was to lose touch with someone.";
+
+ // mapping of sid to voice name
+ // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+ // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+ var sid = 0;
+
+ var MyCallback = (IntPtr samples, int n, float progress) =>
+ {
+ float[] data = new float[n];
+ Marshal.Copy(samples, data, 0, n);
+ // You can process samples here, e.g., play them.
+ // See ../kokoro-tts-playback for how to play them
+ Console.WriteLine($"Progress {progress*100}%");
+
+ // 1 means to keep generating
+ // 0 means to stop generating
+ return 1;
+ };
+
+ var callback = new OfflineTtsCallbackProgress(MyCallback);
+
+ var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);
+
+ var outputFilename = "./generated-kokoro-en.wav";
+ var ok = audio.SaveToWaveFile(outputFilename);
+
+ if (ok)
+ {
+ Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+ }
+ else
+ {
+ Console.WriteLine($"Failed to write {outputFilename}");
+ }
+ }
+}
+
diff --git a/dotnet-examples/kokoro-tts/kokoro-tts.csproj b/dotnet-examples/kokoro-tts/kokoro-tts.csproj
new file mode 100644
index 0000000000..132819c6fd
--- /dev/null
+++ b/dotnet-examples/kokoro-tts/kokoro-tts.csproj
@@ -0,0 +1,15 @@
+
+
+
+ Exe
+ net8.0
+ kokoro_tts
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/dotnet-examples/kokoro-tts/run-kokoro.sh b/dotnet-examples/kokoro-tts/run-kokoro.sh
new file mode 100755
index 0000000000..117a2fc1bc
--- /dev/null
+++ b/dotnet-examples/kokoro-tts/run-kokoro.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+set -ex
+
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+ tar xf kokoro-multi-lang-v1_0.tar.bz2
+ rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+ tar xf kokoro-en-v0_19.tar.bz2
+ rm kokoro-en-v0_19.tar.bz2
+fi
+
+dotnet run
diff --git a/dotnet-examples/offline-decode-files/Program.cs b/dotnet-examples/offline-decode-files/Program.cs
index d971becd3e..0d944e5a39 100644
--- a/dotnet-examples/offline-decode-files/Program.cs
+++ b/dotnet-examples/offline-decode-files/Program.cs
@@ -5,64 +5,73 @@
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// to download non-streaming models
-using CommandLine.Text;
using CommandLine;
+using CommandLine.Text;
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class OfflineDecodeFiles
{
class Options
{
-
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
- public int SampleRate { get; set; } = 16000;
+ public int SampleRate { get; set; } = 16000;
[Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
public int FeatureDim { get; set; } = 80;
[Option(Required = false, HelpText = "Path to tokens.txt")]
- public string Tokens { get; set; } = "";
+ public string Tokens { get; set; } = string.Empty;
[Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
- public string Encoder { get; set; } = "";
+ public string Encoder { get; set; } = string.Empty;
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
- public string Decoder { get; set; } = "";
+ public string Decoder { get; set; } = string.Empty;
- [Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
- public string Joiner { get; set; } = "";
+ [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
+ public string Joiner { get; set; } = string.Empty;
[Option("model-type", Required = false, Default = "", HelpText = "model type")]
- public string ModelType { get; set; } = "";
+ public string ModelType { get; set; } = string.Empty;
[Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
- public string WhisperEncoder { get; set; } = "";
+ public string WhisperEncoder { get; set; } = string.Empty;
[Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
- public string WhisperDecoder { get; set; } = "";
+ public string WhisperDecoder { get; set; } = string.Empty;
[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
- public string WhisperLanguage{ get; set; } = "";
+ public string WhisperLanguage { get; set; } = string.Empty;
[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
- public string WhisperTask{ get; set; } = "transcribe";
+ public string WhisperTask { get; set; } = "transcribe";
+
+ [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
+ public string MoonshinePreprocessor { get; set; } = string.Empty;
+
+ [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
+ public string MoonshineEncoder { get; set; } = string.Empty;
+
+ [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
+ public string MoonshineUncachedDecoder { get; set; } = string.Empty;
+
+ [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
+ public string MoonshineCachedDecoder { get; set; } = string.Empty;
[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
- public string TdnnModel { get; set; } = "";
+ public string TdnnModel { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
- public string Paraformer { get; set; } = "";
+ public string Paraformer { get; set; } = string.Empty;
[Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
- public string NeMoCtc { get; set; } = "";
+ public string NeMoCtc { get; set; } = string.Empty;
[Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
- public string TeleSpeechCtc { get; set; } = "";
+ public string TeleSpeechCtc { get; set; } = string.Empty;
[Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
- public string SenseVoiceModel { get; set; } = "";
+ public string SenseVoiceModel { get; set; } = string.Empty;
[Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")]
public int SenseVoiceUseItn { get; set; } = 1;
@@ -76,7 +85,7 @@ class Options
[Option("rule-fsts", Required = false, Default = "",
HelpText = "If not empty, path to rule fst for inverse text normalization")]
- public string RuleFsts { get; set; } = "";
+ public string RuleFsts { get; set; } = string.Empty;
[Option("max-active-paths", Required = false, Default = 4,
HelpText = @"Used only when --decoding--method is modified_beam_search.
@@ -84,13 +93,13 @@ class Options
public int MaxActivePaths { get; set; } = 4;
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
- public string HotwordsFile { get; set; } = "";
+ public string HotwordsFile { get; set; } = string.Empty;
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
public float HotwordsScore { get; set; } = 1.5F;
[Option("files", Required = true, HelpText = "Audio files for decoding")]
- public IEnumerable Files { get; set; } = new string[] {};
+ public IEnumerable Files { get; set; } = new string[] { };
}
static void Main(string[] args)
@@ -105,7 +114,7 @@ static void Main(string[] args)
private static void DisplayHelp(ParserResult result, IEnumerable errs)
{
- string usage = @"
+ var usage = @"
# Zipformer
dotnet run \
@@ -201,41 +210,48 @@ private static void Run(Options options)
config.ModelConfig.Tokens = options.Tokens;
- if (!String.IsNullOrEmpty(options.Encoder))
+ if (!string.IsNullOrEmpty(options.Encoder))
{
// this is a transducer model
config.ModelConfig.Transducer.Encoder = options.Encoder;
config.ModelConfig.Transducer.Decoder = options.Decoder;
config.ModelConfig.Transducer.Joiner = options.Joiner;
}
- else if (!String.IsNullOrEmpty(options.Paraformer))
+ else if (!string.IsNullOrEmpty(options.Paraformer))
{
config.ModelConfig.Paraformer.Model = options.Paraformer;
}
- else if (!String.IsNullOrEmpty(options.NeMoCtc))
+ else if (!string.IsNullOrEmpty(options.NeMoCtc))
{
config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
}
- else if (!String.IsNullOrEmpty(options.TeleSpeechCtc))
+ else if (!string.IsNullOrEmpty(options.TeleSpeechCtc))
{
config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
}
- else if (!String.IsNullOrEmpty(options.WhisperEncoder))
+ else if (!string.IsNullOrEmpty(options.WhisperEncoder))
{
config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
config.ModelConfig.Whisper.Language = options.WhisperLanguage;
config.ModelConfig.Whisper.Task = options.WhisperTask;
}
- else if (!String.IsNullOrEmpty(options.TdnnModel))
+ else if (!string.IsNullOrEmpty(options.TdnnModel))
{
config.ModelConfig.Tdnn.Model = options.TdnnModel;
}
- else if (!String.IsNullOrEmpty(options.SenseVoiceModel))
+ else if (!string.IsNullOrEmpty(options.SenseVoiceModel))
{
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
}
+ else if (!string.IsNullOrEmpty(options.MoonshinePreprocessor))
+ {
+ config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
+ config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
+ config.ModelConfig.Moonshine.UncachedDecoder = options.MoonshineUncachedDecoder;
+ config.ModelConfig.Moonshine.CachedDecoder = options.MoonshineCachedDecoder;
+ }
else
{
Console.WriteLine("Please provide a model");
@@ -251,17 +267,17 @@ private static void Run(Options options)
config.ModelConfig.Debug = 0;
- OfflineRecognizer recognizer = new OfflineRecognizer(config);
+ var recognizer = new OfflineRecognizer(config);
- string[] files = options.Files.ToArray();
+ var files = options.Files.ToArray();
// We create a separate stream for each file
- List streams = new List();
+ var streams = new List();
streams.EnsureCapacity(files.Length);
for (int i = 0; i != files.Length; ++i)
{
- OfflineStream s = recognizer.CreateStream();
+ var s = recognizer.CreateStream();
WaveReader waveReader = new WaveReader(files[i]);
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
@@ -273,10 +289,21 @@ private static void Run(Options options)
// display results
for (int i = 0; i != files.Length; ++i)
{
- var text = streams[i].Result.Text;
+ var r = streams[i].Result;
Console.WriteLine("--------------------");
Console.WriteLine(files[i]);
- Console.WriteLine(text);
+ Console.WriteLine("Text: {0}", r.Text);
+ Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
+ if (r.Timestamps != null && r.Timestamps.Length > 0) {
+ Console.Write("Timestamps: [");
+ var sep = string.Empty;
+ for (int k = 0; k != r.Timestamps.Length; ++k)
+ {
+ Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
+ sep = ", ";
+ }
+ Console.WriteLine("]");
+ }
}
Console.WriteLine("--------------------");
}
diff --git a/dotnet-examples/offline-decode-files/offline-decode-files.csproj b/dotnet-examples/offline-decode-files/offline-decode-files.csproj
index ffdfb6acee..5b28d48b72 100644
--- a/dotnet-examples/offline-decode-files/offline-decode-files.csproj
+++ b/dotnet-examples/offline-decode-files/offline-decode-files.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
offline_decode_files
enable
enable
diff --git a/dotnet-examples/offline-decode-files/run-moonshine.sh b/dotnet-examples/offline-decode-files/run-moonshine.sh
new file mode 100755
index 0000000000..025e0902db
--- /dev/null
+++ b/dotnet-examples/offline-decode-files/run-moonshine.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+dotnet run \
+ --num-threads=2 \
+ --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+ --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+ --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+ --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+ --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+ --files ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
diff --git a/dotnet-examples/offline-punctuation/Program.cs b/dotnet-examples/offline-punctuation/Program.cs
index d299f8abcf..6f85237b64 100644
--- a/dotnet-examples/offline-punctuation/Program.cs
+++ b/dotnet-examples/offline-punctuation/Program.cs
@@ -12,8 +12,6 @@
// dotnet run
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class OfflinePunctuationDemo
{
@@ -25,14 +23,14 @@ static void Main(string[] args)
config.Model.NumThreads = 1;
var punct = new OfflinePunctuation(config);
- string[] textList = new string[] {
+ var textList = new string[] {
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
"我们都是木头人不会说话不会动",
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
};
Console.WriteLine("---------");
- foreach (string text in textList)
+ foreach (var text in textList)
{
string textWithPunct = punct.AddPunct(text);
Console.WriteLine("Input text: {0}", text);
diff --git a/dotnet-examples/offline-punctuation/offline-punctuation.csproj b/dotnet-examples/offline-punctuation/offline-punctuation.csproj
index 2d94fcb384..0e3ee42a9a 100644
--- a/dotnet-examples/offline-punctuation/offline-punctuation.csproj
+++ b/dotnet-examples/offline-punctuation/offline-punctuation.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
offline_punctuation
enable
enable
diff --git a/dotnet-examples/offline-speaker-diarization/Program.cs b/dotnet-examples/offline-speaker-diarization/Program.cs
new file mode 100644
index 0000000000..4d8d91b0e5
--- /dev/null
+++ b/dotnet-examples/offline-speaker-diarization/Program.cs
@@ -0,0 +1,82 @@
+// Copyright (c) 2024 Xiaomi Corporation
+//
+
+// This file shows how to use sherpa-onnx C# API for speaker diarization
+/*
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+
+ dotnet run
+*/
+
+using SherpaOnnx;
+
+class OfflineSpeakerDiarizationDemo
+{
+ static void Main(string[] args)
+ {
+ var config = new OfflineSpeakerDiarizationConfig();
+ config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+ config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+
+ // the test wave ./0-four-speakers-zh.wav has 4 speakers, so
+ // we set num_clusters to 4
+ //
+ config.Clustering.NumClusters = 4;
+ // If you don't know the number of speakers in the test wave file, please
+ // use
+ // config.Clustering.Threshold = 0.5; // You need to tune this threshold
+ var sd = new OfflineSpeakerDiarization(config);
+
+ var testWaveFile = "./0-four-speakers-zh.wav";
+ var waveReader = new WaveReader(testWaveFile);
+ if (sd.SampleRate != waveReader.SampleRate)
+ {
+ Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
+ return;
+ }
+
+ Console.WriteLine("Started");
+
+ // var segments = sd.Process(waveReader.Samples); // this one is also ok
+
+ var progressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
+ {
+ var progress = 100.0F * numProcessedChunks / numTotalChunks;
+ Console.WriteLine("Progress {0}%", string.Format("{0:0.00}", progress));
+ return 0;
+ };
+
+ var callback = new OfflineSpeakerDiarizationProgressCallback(progressCallback);
+ var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
+
+ foreach (var s in segments)
+ {
+ Console.WriteLine("{0} -- {1} speaker_{2}", string.Format("{0:0.00}", s.Start), string.Format("{0:0.00}", s.End), s.Speaker);
+ }
+ }
+}
diff --git a/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj b/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
new file mode 100644
index 0000000000..c7b15faa53
--- /dev/null
+++ b/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
@@ -0,0 +1,15 @@
+
+
+
+ Exe
+ net8.0
+ offline_speaker_diarization
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/dotnet-examples/offline-speaker-diarization/run.sh b/dotnet-examples/offline-speaker-diarization/run.sh
new file mode 100755
index 0000000000..fe64412f96
--- /dev/null
+++ b/dotnet-examples/offline-speaker-diarization/run.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+dotnet run
diff --git a/dotnet-examples/offline-tts-play/Program.cs b/dotnet-examples/offline-tts-play/Program.cs
index a142c127eb..543a50cdd6 100644
--- a/dotnet-examples/offline-tts-play/Program.cs
+++ b/dotnet-examples/offline-tts-play/Program.cs
@@ -10,62 +10,67 @@
// Note that you need a speaker to run this file since it will play
// the generated audio as it is generating.
-using CommandLine.Text;
using CommandLine;
+using CommandLine.Text;
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
-using System.Collections.Generic;
using System.Runtime.InteropServices;
-using System.Threading;
-using System;
class OfflineTtsPlayDemo
{
class Options
{
-
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
- public string RuleFsts { get; set; }
+ public string RuleFsts { get; set; } = string.Empty;
- [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
- public string DictDir { get; set; }
+ [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
+ public string RuleFars { get; set; } = string.Empty;
- [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
- public string DataDir { get; set; }
+ [Option("dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
+ public string DictDir { get; set; } = string.Empty;
- [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
- public float LengthScale { get; set; }
+ [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
+ public string DataDir { get; set; } = string.Empty;
- [Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
- public float NoiseScale { get; set; }
+ [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
+ public float LengthScale { get; set; } = 1;
- [Option("vits-noise-scale-w", Required = false, Default = 0.8f, HelpText = "noise_scale_w for VITS models")]
- public float NoiseScaleW { get; set; }
+ [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
+ public float NoiseScale { get; set; } = 0.667F;
- [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
- public string Lexicon { get; set; }
+ [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
+ public float NoiseScaleW { get; set; } = 0.8F;
- [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
- public string Tokens { get; set; }
+ [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
+ public string Lexicon { get; set; } = string.Empty;
+
+ [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
+ public string Tokens { get; set; } = string.Empty;
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
- public int MaxNumSentences { get; set; }
+ public int MaxNumSentences { get; set; } = 1;
[Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
- public int Debug { get; set; }
+ public int Debug { get; set; } = 0;
+
+ [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
+ public string Model { get; set; } = string.Empty;
+
+ [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
+ public string AcousticModel { get; set; } = "";
- [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
- public string Model { get; set; }
+ [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
+ public string Vocoder { get; set; } = "";
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
- public int SpeakerId { get; set; }
+ public int SpeakerId { get; set; } = 0;
[Option("text", Required = true, HelpText = "Text to synthesize")]
- public string Text { get; set; }
+ public string Text { get; set; } = string.Empty;
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
- public string OutputFilename { get; set; }
+ public string OutputFilename { get; set; } = "./generated.wav";
}
static void Main(string[] args)
@@ -81,6 +86,42 @@ static void Main(string[] args)
private static void DisplayHelp(ParserResult result, IEnumerable errs)
{
string usage = @"
+# matcha-icefall-zh-baker
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --dict-dir=./matcha-icefall-zh-baker/dict \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --debug=1 \
+ --output-filename=./matcha-zh.wav \
+ --text='某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。'
+
+# matcha-icefall-en_US-ljspeech
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --debug=1 \
+ --output-filename=./matcha-zh.wav \
+ --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
# vits-aishell3
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
@@ -88,8 +129,8 @@ tar xf vits-zh-aishell3.tar.bz2
dotnet run \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
- --vits-tokens=./vits-zh-aishell3/tokens.txt \
- --vits-lexicon=./vits-zh-aishell3/lexicon.txt \
+ --tokens=./vits-zh-aishell3/tokens.txt \
+ --lexicon=./vits-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--sid=66 \
--debug=1 \
@@ -103,8 +144,8 @@ tar xf vits-piper-en_US-amy-low.tar.bz2
dotnet run \
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
- --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
- --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+ ---tokens=./vits-piper-en_US-amy-low/tokens.txt \
+ --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
--debug=1 \
--output-filename=./amy.wav \
--text='This is a text to speech application in dotnet with Next Generation Kaldi'
@@ -124,10 +165,10 @@ to download more models.
Console.WriteLine(helpText);
}
-
private static void Run(Options options)
{
- OfflineTtsConfig config = new OfflineTtsConfig();
+ var config = new OfflineTtsConfig();
+
config.Model.Vits.Model = options.Model;
config.Model.Vits.Lexicon = options.Lexicon;
config.Model.Vits.Tokens = options.Tokens;
@@ -136,16 +177,25 @@ private static void Run(Options options)
config.Model.Vits.NoiseScale = options.NoiseScale;
config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
config.Model.Vits.LengthScale = options.LengthScale;
+
+ config.Model.Matcha.AcousticModel = options.AcousticModel;
+ config.Model.Matcha.Vocoder = options.Vocoder;
+ config.Model.Matcha.Lexicon = options.Lexicon;
+ config.Model.Matcha.Tokens = options.Tokens;
+ config.Model.Matcha.DataDir = options.DataDir;
+ config.Model.Matcha.DictDir = options.DictDir;
+ config.Model.Matcha.NoiseScale = options.NoiseScale;
+ config.Model.Matcha.LengthScale = options.LengthScale;
+
config.Model.NumThreads = 1;
config.Model.Debug = options.Debug;
config.Model.Provider = "cpu";
config.RuleFsts = options.RuleFsts;
config.MaxNumSentences = options.MaxNumSentences;
- OfflineTts tts = new OfflineTts(config);
- float speed = 1.0f / options.LengthScale;
- int sid = options.SpeakerId;
-
+ var tts = new OfflineTts(config);
+ var speed = 1.0f / options.LengthScale;
+ var sid = options.SpeakerId;
Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize();
@@ -166,11 +216,11 @@ private static void Run(Options options)
Environment.Exit(1);
}
- DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
+ var info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine();
Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");
- StreamParameters param = new StreamParameters();
+ var param = new StreamParameters();
param.device = deviceIndex;
param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32;
@@ -178,7 +228,7 @@ private static void Run(Options options)
param.hostApiSpecificStreamInfo = IntPtr.Zero;
// https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
- BlockingCollection dataItems = new BlockingCollection();
+ var dataItems = new BlockingCollection();
var MyCallback = (IntPtr samples, int n) =>
{
@@ -193,9 +243,9 @@ private static void Run(Options options)
return 1;
};
- bool playFinished = false;
+ var playFinished = false;
- float[] lastSampleArray = null;
+ float[]? lastSampleArray = null;
int lastIndex = 0; // not played
PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
@@ -270,10 +320,10 @@ IntPtr userData
stream.Start();
- OfflineTtsCallback callback = new OfflineTtsCallback(MyCallback);
+ var callback = new OfflineTtsCallback(MyCallback);
- OfflineTtsGeneratedAudio audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
- bool ok = audio.SaveToWaveFile(options.OutputFilename);
+ var audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
+ var ok = audio.SaveToWaveFile(options.OutputFilename);
if (ok)
{
diff --git a/dotnet-examples/offline-tts-play/offline-tts-play.csproj b/dotnet-examples/offline-tts-play/offline-tts-play.csproj
index d28ae62c87..b777bcafe0 100644
--- a/dotnet-examples/offline-tts-play/offline-tts-play.csproj
+++ b/dotnet-examples/offline-tts-play/offline-tts-play.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
offline_tts_play
enable
enable
diff --git a/dotnet-examples/offline-tts-play/run-hf-fanchen.sh b/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
index b16a3ca686..84e6685788 100755
--- a/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
+++ b/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
@@ -8,8 +8,8 @@ fi
dotnet run \
--vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
- --vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
- --vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
+ --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
+ --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
--tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
--vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
--sid=100 \
diff --git a/dotnet-examples/offline-tts-play/run-matcha-en.sh b/dotnet-examples/offline-tts-play/run-matcha-en.sh
new file mode 100755
index 0000000000..0f7caa2154
--- /dev/null
+++ b/dotnet-examples/offline-tts-play/run-matcha-en.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -ex
+
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --debug=1 \
+ --output-filename=./matcha-en.wav \
+ --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
diff --git a/dotnet-examples/offline-tts-play/run-matcha-zh.sh b/dotnet-examples/offline-tts-play/run-matcha-zh.sh
new file mode 100755
index 0000000000..e3b34268c7
--- /dev/null
+++ b/dotnet-examples/offline-tts-play/run-matcha-zh.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --dict-dir=./matcha-icefall-zh-baker/dict \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --debug=1 \
+ --output-filename=./matcha-zh.wav \
+ --text="某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
diff --git a/dotnet-examples/offline-tts-play/run-piper.sh b/dotnet-examples/offline-tts-play/run-piper.sh
index 7c97498d21..1a4d108069 100755
--- a/dotnet-examples/offline-tts-play/run-piper.sh
+++ b/dotnet-examples/offline-tts-play/run-piper.sh
@@ -9,8 +9,8 @@ fi
dotnet run \
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
- --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
- --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+ --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+ --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
--debug=1 \
--output-filename=./amy.wav \
--text="This is a text to speech application in dotnet with Next Generation Kaldi"
diff --git a/dotnet-examples/offline-tts/Program.cs b/dotnet-examples/offline-tts/Program.cs
index 6216095f4f..21f90c525d 100644
--- a/dotnet-examples/offline-tts/Program.cs
+++ b/dotnet-examples/offline-tts/Program.cs
@@ -6,43 +6,40 @@
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
-using CommandLine.Text;
using CommandLine;
+using CommandLine.Text;
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class OfflineTtsDemo
{
class Options
{
-
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
- public string RuleFsts { get; set; } = "";
+ public string RuleFsts { get; set; } = string.Empty;
[Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
- public string RuleFars { get; set; } = "";
+ public string RuleFars { get; set; } = string.Empty;
- [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
- public string DictDir { get; set; } = "";
+ [Option("dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
+ public string DictDir { get; set; } = string.Empty;
- [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
- public string DataDir { get; set; } = "";
+ [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
+ public string DataDir { get; set; } = string.Empty;
- [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
+ [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
public float LengthScale { get; set; } = 1;
- [Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
+ [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
public float NoiseScale { get; set; } = 0.667F;
[Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
public float NoiseScaleW { get; set; } = 0.8F;
- [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
- public string Lexicon { get; set; } = "";
+ [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
+ public string Lexicon { get; set; } = string.Empty;
- [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
- public string Tokens { get; set; } = "";
+ [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
+ public string Tokens { get; set; } = string.Empty;
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
public int MaxNumSentences { get; set; } = 1;
@@ -50,14 +47,20 @@ class Options
[Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
public int Debug { get; set; } = 0;
- [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
- public string Model { get; set; } = "";
+ [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
+ public string Model { get; set; } = string.Empty;
+
+ [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
+ public string AcousticModel { get; set; } = "";
+
+ [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
+ public string Vocoder { get; set; } = "";
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
public int SpeakerId { get; set; } = 0;
[Option("text", Required = true, HelpText = "Text to synthesize")]
- public string Text { get; set; } = "";
+ public string Text { get; set; } = string.Empty;
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
public string OutputFilename { get; set; } = "./generated.wav";
@@ -65,7 +68,7 @@ class Options
static void Main(string[] args)
{
- var parser = new CommandLine.Parser(with => with.HelpWriter = null);
+ var parser = new Parser(with => with.HelpWriter = null);
var parserResult = parser.ParseArguments(args);
parserResult
@@ -75,7 +78,43 @@ static void Main(string[] args)
private static void DisplayHelp(ParserResult result, IEnumerable errs)
{
- string usage = @"
+ var usage = @"
+# matcha-icefall-zh-baker
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --dict-dir=./matcha-icefall-zh-baker/dict \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --debug=1 \
+ --output-filename=./matcha-zh.wav \
+ --text='某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。'
+
+# matcha-icefall-en_US-ljspeech
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --debug=1 \
+ --output-filename=./matcha-zh.wav \
+ --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
# vits-aishell3
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
@@ -83,8 +122,8 @@ tar xvf vits-icefall-zh-aishell3.tar.bz2
dotnet run \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
- --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
- --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
+ --tokens=./vits-icefall-zh-aishell3/tokens.txt \
+ --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=66 \
@@ -99,8 +138,8 @@ tar xf vits-piper-en_US-amy-low.tar.bz2
dotnet run \
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
- --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
- --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+ --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+ --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
--debug=1 \
--output-filename=./amy.wav \
--text='This is a text to speech application in dotnet with Next Generation Kaldi'
@@ -122,7 +161,7 @@ to download more models.
private static void Run(Options options)
{
- OfflineTtsConfig config = new OfflineTtsConfig();
+ var config = new OfflineTtsConfig();
config.Model.Vits.Model = options.Model;
config.Model.Vits.Lexicon = options.Lexicon;
config.Model.Vits.Tokens = options.Tokens;
@@ -131,6 +170,16 @@ private static void Run(Options options)
config.Model.Vits.NoiseScale = options.NoiseScale;
config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
config.Model.Vits.LengthScale = options.LengthScale;
+
+ config.Model.Matcha.AcousticModel = options.AcousticModel;
+ config.Model.Matcha.Vocoder = options.Vocoder;
+ config.Model.Matcha.Lexicon = options.Lexicon;
+ config.Model.Matcha.Tokens = options.Tokens;
+ config.Model.Matcha.DataDir = options.DataDir;
+ config.Model.Matcha.DictDir = options.DictDir;
+ config.Model.Matcha.NoiseScale = options.NoiseScale;
+ config.Model.Matcha.LengthScale = options.LengthScale;
+
config.Model.NumThreads = 1;
config.Model.Debug = options.Debug;
config.Model.Provider = "cpu";
@@ -138,11 +187,11 @@ private static void Run(Options options)
config.RuleFars = options.RuleFars;
config.MaxNumSentences = options.MaxNumSentences;
- OfflineTts tts = new OfflineTts(config);
- float speed = 1.0f / options.LengthScale;
- int sid = options.SpeakerId;
- OfflineTtsGeneratedAudio audio = tts.Generate(options.Text, speed, sid);
- bool ok = audio.SaveToWaveFile(options.OutputFilename);
+ var tts = new OfflineTts(config);
+ var speed = 1.0f / options.LengthScale;
+ var sid = options.SpeakerId;
+ var audio = tts.Generate(options.Text, speed, sid);
+ var ok = audio.SaveToWaveFile(options.OutputFilename);
if (ok)
{
diff --git a/dotnet-examples/offline-tts/offline-tts.csproj b/dotnet-examples/offline-tts/offline-tts.csproj
index 48548fc4c5..20b048f198 100644
--- a/dotnet-examples/offline-tts/offline-tts.csproj
+++ b/dotnet-examples/offline-tts/offline-tts.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
offline_tts
enable
enable
diff --git a/dotnet-examples/offline-tts/run-aishell3.sh b/dotnet-examples/offline-tts/run-aishell3.sh
index 02380f07c2..9a54df349d 100755
--- a/dotnet-examples/offline-tts/run-aishell3.sh
+++ b/dotnet-examples/offline-tts/run-aishell3.sh
@@ -8,8 +8,8 @@ fi
dotnet run \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
- --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
- --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
+ --tokens=./vits-icefall-zh-aishell3/tokens.txt \
+ --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=66 \
diff --git a/dotnet-examples/offline-tts/run-hf-fanchen.sh b/dotnet-examples/offline-tts/run-hf-fanchen.sh
index b16a3ca686..a7a52e7330 100755
--- a/dotnet-examples/offline-tts/run-hf-fanchen.sh
+++ b/dotnet-examples/offline-tts/run-hf-fanchen.sh
@@ -8,10 +8,10 @@ fi
dotnet run \
--vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
- --vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
- --vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
+ --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
+ --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
--tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
- --vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
+ --dict-dir=./vits-zh-hf-fanchen-C/dict \
--sid=100 \
--debug=1 \
--output-filename=./fanchen-100.wav \
diff --git a/dotnet-examples/offline-tts/run-matcha-en.sh b/dotnet-examples/offline-tts/run-matcha-en.sh
new file mode 100755
index 0000000000..0f7caa2154
--- /dev/null
+++ b/dotnet-examples/offline-tts/run-matcha-en.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -ex
+
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --debug=1 \
+ --output-filename=./matcha-en.wav \
+ --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
diff --git a/dotnet-examples/offline-tts/run-matcha-zh.sh b/dotnet-examples/offline-tts/run-matcha-zh.sh
new file mode 100755
index 0000000000..e3b34268c7
--- /dev/null
+++ b/dotnet-examples/offline-tts/run-matcha-zh.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+
+dotnet run \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --dict-dir=./matcha-icefall-zh-baker/dict \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --debug=1 \
+ --output-filename=./matcha-zh.wav \
+ --text="某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
diff --git a/dotnet-examples/offline-tts/run-piper.sh b/dotnet-examples/offline-tts/run-piper.sh
index ff639c5707..273799bb3b 100755
--- a/dotnet-examples/offline-tts/run-piper.sh
+++ b/dotnet-examples/offline-tts/run-piper.sh
@@ -10,8 +10,8 @@ fi
dotnet run \
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
- --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
- --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+ --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+ --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
--debug=1 \
--output-filename=./amy.wav \
--text="This is a text to speech application in dotnet with Next Generation Kaldi"
diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs
index ad53624deb..a1f01be57e 100644
--- a/dotnet-examples/online-decode-files/Program.cs
+++ b/dotnet-examples/online-decode-files/Program.cs
@@ -6,40 +6,37 @@
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models
-using CommandLine.Text;
using CommandLine;
+using CommandLine.Text;
using SherpaOnnx;
-using System.Collections.Generic;
-using System.Linq;
-using System;
class OnlineDecodeFiles
{
class Options
{
[Option(Required = true, HelpText = "Path to tokens.txt")]
- public string Tokens { get; set; } = "";
+ public string Tokens { get; set; } = string.Empty;
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
- public string Provider { get; set; } = "";
+ public string Provider { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
- public string Encoder { get; set; } = "";
+ public string Encoder { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
- public string Decoder { get; set; } = "";
+ public string Decoder { get; set; } = string.Empty;
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
- public string Joiner { get; set; } = "";
+ public string Joiner { get; set; } = string.Empty;
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
- public string ParaformerEncoder { get; set; } = "";
+ public string ParaformerEncoder { get; set; } = string.Empty;
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
- public string ParaformerDecoder { get; set; } = "";
+ public string ParaformerDecoder { get; set; } = string.Empty;
[Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")]
- public string Zipformer2Ctc { get; set; } = "";
+ public string Zipformer2Ctc { get; set; } = string.Empty;
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; } = 1;
@@ -80,15 +77,14 @@ larger than this value after something that is not blank has been decoded. Used
public float Rule3MinUtteranceLength { get; set; } = 20.0F;
[Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
- public string HotwordsFile { get; set; } = "";
+ public string HotwordsFile { get; set; } = string.Empty;
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
public float HotwordsScore { get; set; } = 1.5F;
[Option("rule-fsts", Required = false, Default = "",
HelpText = "If not empty, path to rule fst for inverse text normalization")]
- public string RuleFsts { get; set; } = "";
-
+ public string RuleFsts { get; set; } = string.Empty;
[Option("files", Required = true, HelpText = "Audio files for decoding")]
public IEnumerable Files { get; set; } = new string[] {};
@@ -162,7 +158,7 @@ to download pre-trained streaming models.
private static void Run(Options options)
{
- OnlineRecognizerConfig config = new OnlineRecognizerConfig();
+ var config = new OnlineRecognizerConfig();
config.FeatConfig.SampleRate = options.SampleRate;
// All models from icefall using feature dim 80.
@@ -194,22 +190,22 @@ private static void Run(Options options)
config.HotwordsScore = options.HotwordsScore;
config.RuleFsts = options.RuleFsts;
- OnlineRecognizer recognizer = new OnlineRecognizer(config);
+ var recognizer = new OnlineRecognizer(config);
- string[] files = options.Files.ToArray();
+ var files = options.Files.ToArray();
// We create a separate stream for each file
- List streams = new List();
+ var streams = new List();
streams.EnsureCapacity(files.Length);
for (int i = 0; i != files.Length; ++i)
{
- OnlineStream s = recognizer.CreateStream();
+ var s = recognizer.CreateStream();
- WaveReader waveReader = new WaveReader(files[i]);
+ var waveReader = new WaveReader(files[i]);
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
- float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
+ var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished();
@@ -230,7 +226,7 @@ private static void Run(Options options)
// display results
for (int i = 0; i != files.Length; ++i)
{
- OnlineRecognizerResult r = recognizer.GetResult(streams[i]);
+ var r = recognizer.GetResult(streams[i]);
var text = r.Text;
var tokens = r.Tokens;
Console.WriteLine("--------------------");
@@ -238,7 +234,7 @@ private static void Run(Options options)
Console.WriteLine("text: {0}", text);
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
Console.Write("timestamps: [");
- r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
+ r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
Console.WriteLine("]");
}
Console.WriteLine("--------------------");
diff --git a/dotnet-examples/online-decode-files/online-decode-files.csproj b/dotnet-examples/online-decode-files/online-decode-files.csproj
index 0ff581102e..f1cc3baa72 100644
--- a/dotnet-examples/online-decode-files/online-decode-files.csproj
+++ b/dotnet-examples/online-decode-files/online-decode-files.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
online_decode_files
enable
enable
diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln
index 397fe99e5d..404c49762e 100644
--- a/dotnet-examples/sherpa-onnx.sln
+++ b/dotnet-examples/sherpa-onnx.sln
@@ -29,7 +29,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\kokoro-tts.csproj", "{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -89,10 +93,18 @@ Global
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
- {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU
+ {9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.Build.0 = Release|Any CPU
+ {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/dotnet-examples/speaker-identification/Program.cs b/dotnet-examples/speaker-identification/Program.cs
index aef53e8519..20ac703905 100644
--- a/dotnet-examples/speaker-identification/Program.cs
+++ b/dotnet-examples/speaker-identification/Program.cs
@@ -16,20 +16,18 @@
// dotnet run
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class SpeakerIdentificationDemo
{
- public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
+ public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename)
{
- WaveReader reader = new WaveReader(filename);
+ var reader = new WaveReader(filename);
- OnlineStream stream = extractor.CreateStream();
+ var stream = extractor.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
stream.InputFinished();
- float[] embedding = extractor.Compute(stream);
+ var embedding = extractor.Compute(stream);
return embedding;
}
@@ -43,25 +41,25 @@ static void Main(string[] args)
var manager = new SpeakerEmbeddingManager(extractor.Dim);
- string[] spk1Files =
+ var spk1Files =
new string[] {
"./sr-data/enroll/fangjun-sr-1.wav",
"./sr-data/enroll/fangjun-sr-2.wav",
"./sr-data/enroll/fangjun-sr-3.wav",
};
- float[][] spk1Vec = new float[spk1Files.Length][];
+ var spk1Vec = new float[spk1Files.Length][];
for (int i = 0; i < spk1Files.Length; ++i)
{
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
}
- string[] spk2Files =
+ var spk2Files =
new string[] {
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
};
- float[][] spk2Vec = new float[spk2Files.Length][];
+ var spk2Vec = new float[spk2Files.Length][];
for (int i = 0; i < spk2Files.Length; ++i)
{
@@ -100,14 +98,14 @@ static void Main(string[] args)
Console.WriteLine("---All speakers---");
- string[] allSpeakers = manager.GetAllSpeakers();
+ var allSpeakers = manager.GetAllSpeakers();
foreach (var s in allSpeakers)
{
Console.WriteLine(s);
}
Console.WriteLine("------------");
- string[] testFiles =
+ var testFiles =
new string[] {
"./sr-data/test/fangjun-test-sr-1.wav",
"./sr-data/test/leijun-test-sr-1.wav",
@@ -117,9 +115,9 @@ static void Main(string[] args)
float threshold = 0.6f;
foreach (var file in testFiles)
{
- float[] embedding = ComputeEmbedding(extractor, file);
+ var embedding = ComputeEmbedding(extractor, file);
- String name = manager.Search(embedding, threshold);
+ var name = manager.Search(embedding, threshold);
if (name == "")
{
name = "";
diff --git a/dotnet-examples/speaker-identification/speaker-identification.csproj b/dotnet-examples/speaker-identification/speaker-identification.csproj
index 7c857fa54c..45a42f49e6 100644
--- a/dotnet-examples/speaker-identification/speaker-identification.csproj
+++ b/dotnet-examples/speaker-identification/speaker-identification.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
speaker_identification
enable
enable
diff --git a/dotnet-examples/speech-recognition-from-microphone/Program.cs b/dotnet-examples/speech-recognition-from-microphone/Program.cs
index 586e3b1624..aa0e7803f4 100644
--- a/dotnet-examples/speech-recognition-from-microphone/Program.cs
+++ b/dotnet-examples/speech-recognition-from-microphone/Program.cs
@@ -6,47 +6,43 @@
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models
-using CommandLine.Text;
using CommandLine;
+using CommandLine.Text;
using PortAudioSharp;
-using System.Threading;
using SherpaOnnx;
-using System.Collections.Generic;
using System.Runtime.InteropServices;
-using System;
-
class SpeechRecognitionFromMicrophone
{
class Options
{
[Option(Required = true, HelpText = "Path to tokens.txt")]
- public string Tokens { get; set; }
+ public string? Tokens { get; set; }
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
- public string Provider { get; set; }
+ public string? Provider { get; set; }
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
- public string Encoder { get; set; }
+ public string? Encoder { get; set; }
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
- public string Decoder { get; set; }
+ public string? Decoder { get; set; }
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
- public string Joiner { get; set; }
+ public string? Joiner { get; set; }
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
- public string ParaformerEncoder { get; set; }
+ public string? ParaformerEncoder { get; set; }
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
- public string ParaformerDecoder { get; set; }
+ public string? ParaformerDecoder { get; set; }
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; }
[Option("decoding-method", Required = false, Default = "greedy_search",
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
- public string DecodingMethod { get; set; }
+ public string? DecodingMethod { get; set; }
[Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
public bool Debug { get; set; }
@@ -126,7 +122,7 @@ to download pre-trained streaming models.
private static void Run(Options options)
{
- OnlineRecognizerConfig config = new OnlineRecognizerConfig();
+ var config = new OnlineRecognizerConfig();
config.FeatConfig.SampleRate = options.SampleRate;
// All models from icefall using feature dim 80.
@@ -153,9 +149,9 @@ private static void Run(Options options)
config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
- OnlineRecognizer recognizer = new OnlineRecognizer(config);
+ var recognizer = new OnlineRecognizer(config);
- OnlineStream s = recognizer.CreateStream();
+ var s = recognizer.CreateStream();
Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize();
@@ -176,12 +172,12 @@ private static void Run(Options options)
Environment.Exit(1);
}
- DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
+ var info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine();
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
- StreamParameters param = new StreamParameters();
+ var param = new StreamParameters();
param.device = deviceIndex;
param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32;
@@ -189,14 +185,14 @@ private static void Run(Options options)
param.hostApiSpecificStreamInfo = IntPtr.Zero;
PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
- UInt32 frameCount,
+ uint frameCount,
ref StreamCallbackTimeInfo timeInfo,
StreamCallbackFlags statusFlags,
IntPtr userData
) =>
{
- float[] samples = new float[frameCount];
- Marshal.Copy(input, samples, 0, (Int32)frameCount);
+ var samples = new float[frameCount];
+ Marshal.Copy(input, samples, 0, (int)frameCount);
s.AcceptWaveform(options.SampleRate, samples);
@@ -215,7 +211,7 @@ IntPtr userData
stream.Start();
- String lastText = "";
+ var lastText = string.Empty;
int segmentIndex = 0;
while (true)
@@ -245,9 +241,5 @@ IntPtr userData
Thread.Sleep(200); // ms
}
-
- PortAudio.Terminate();
-
-
}
}
diff --git a/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj b/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
index 901c8a1586..72b7b6c914 100644
--- a/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
+++ b/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
speech_recognition_from_microphone
enable
enable
diff --git a/dotnet-examples/spoken-language-identification/Program.cs b/dotnet-examples/spoken-language-identification/Program.cs
index 05a785d7cf..d2f210e85f 100644
--- a/dotnet-examples/spoken-language-identification/Program.cs
+++ b/dotnet-examples/spoken-language-identification/Program.cs
@@ -15,12 +15,9 @@
// dotnet run
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class SpokenLanguageIdentificationDemo
{
-
static void Main(string[] args)
{
var config = new SpokenLanguageIdentificationConfig();
@@ -30,7 +27,7 @@ static void Main(string[] args)
var slid = new SpokenLanguageIdentification(config);
var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
- WaveReader waveReader = new WaveReader(filename);
+ var waveReader = new WaveReader(filename);
var s = slid.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
diff --git a/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
index b8b431a484..e424b2d570 100644
--- a/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
+++ b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
spoken_language_identification
enable
enable
diff --git a/dotnet-examples/streaming-hlg-decoding/Program.cs b/dotnet-examples/streaming-hlg-decoding/Program.cs
index 6ac7c8c945..e522b81649 100644
--- a/dotnet-examples/streaming-hlg-decoding/Program.cs
+++ b/dotnet-examples/streaming-hlg-decoding/Program.cs
@@ -13,12 +13,9 @@
// dotnet run
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class StreamingHlgDecodingDemo
{
-
static void Main(string[] args)
{
var config = new OnlineRecognizerConfig();
@@ -32,15 +29,15 @@ static void Main(string[] args)
config.ModelConfig.Debug = 0;
config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
- OnlineRecognizer recognizer = new OnlineRecognizer(config);
+ var recognizer = new OnlineRecognizer(config);
var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
- WaveReader waveReader = new WaveReader(filename);
- OnlineStream s = recognizer.CreateStream();
+ var waveReader = new WaveReader(filename);
+ var s = recognizer.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
- float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
+ var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished();
@@ -49,7 +46,7 @@ static void Main(string[] args)
recognizer.Decode(s);
}
- OnlineRecognizerResult r = recognizer.GetResult(s);
+ var r = recognizer.GetResult(s);
var text = r.Text;
var tokens = r.Tokens;
Console.WriteLine("--------------------");
@@ -57,10 +54,8 @@ static void Main(string[] args)
Console.WriteLine("text: {0}", text);
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
Console.Write("timestamps: [");
- r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
+ r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
Console.WriteLine("]");
Console.WriteLine("--------------------");
}
}
-
-
diff --git a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
index 66e0401f13..6ed8fc6991 100644
--- a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
+++ b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
streaming_hlg_decoding
enable
enable
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
index abc080b882..e8dfbe6fa1 100644
--- a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
@@ -3,8 +3,6 @@
// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.
using SherpaOnnx;
-using System.Collections.Generic;
-using System;
class VadNonStreamingAsrParaformer
{
@@ -12,45 +10,49 @@ static void Main(string[] args)
{
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
- OfflineRecognizerConfig config = new OfflineRecognizerConfig();
+ var config = new OfflineRecognizerConfig();
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";
config.ModelConfig.Debug = 0;
- OfflineRecognizer recognizer = new OfflineRecognizer(config);
+ var recognizer = new OfflineRecognizer(config);
- VadModelConfig vadModelConfig = new VadModelConfig();
+ var vadModelConfig = new VadModelConfig();
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
vadModelConfig.Debug = 0;
- VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
+ var vad = new VoiceActivityDetector(vadModelConfig, 60);
- string testWaveFilename = "./lei-jun-test.wav";
- WaveReader reader = new WaveReader(testWaveFilename);
+ var testWaveFilename = "./lei-jun-test.wav";
+ var reader = new WaveReader(testWaveFilename);
int numSamples = reader.Samples.Length;
int windowSize = vadModelConfig.SileroVad.WindowSize;
int sampleRate = vadModelConfig.SampleRate;
int numIter = numSamples / windowSize;
- for (int i = 0; i != numIter; ++i) {
+ for (int i = 0; i != numIter; ++i)
+ {
int start = i * windowSize;
- float[] samples = new float[windowSize];
+ var samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize);
vad.AcceptWaveform(samples);
- if (vad.IsSpeechDetected()) {
- while (!vad.IsEmpty()) {
+ if (vad.IsSpeechDetected())
+ {
+ while (!vad.IsEmpty())
+ {
SpeechSegment segment = vad.Front();
- float startTime = segment.Start / (float)sampleRate;
- float duration = segment.Samples.Length / (float)sampleRate;
+ var startTime = segment.Start / (float)sampleRate;
+ var duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream);
- String text = stream.Result.Text;
+ var text = stream.Result.Text;
- if (!String.IsNullOrEmpty(text)) {
- Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
- String.Format("{0:0.00}", startTime+duration), text);
+ if (!string.IsNullOrEmpty(text))
+ {
+ Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
+ string.Format("{0:0.00}", startTime + duration), text);
}
vad.Pop();
@@ -60,19 +62,21 @@ static void Main(string[] args)
vad.Flush();
- while (!vad.IsEmpty()) {
- SpeechSegment segment = vad.Front();
+ while (!vad.IsEmpty())
+ {
+ var segment = vad.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
- OfflineStream stream = recognizer.CreateStream();
+ var stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream);
- String text = stream.Result.Text;
+ var text = stream.Result.Text;
- if (!String.IsNullOrEmpty(text)) {
- Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
- String.Format("{0:0.00}", startTime+duration), text);
+ if (!string.IsNullOrEmpty(text))
+ {
+ Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
+ string.Format("{0:0.00}", startTime + duration), text);
}
vad.Pop();
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
index a5c5f1022d..1736869a87 100644
--- a/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
vad_non_streaming_asr_paraformer
enable
enable
diff --git a/ffmpeg-examples/sherpa-onnx-ffmpeg.c b/ffmpeg-examples/sherpa-onnx-ffmpeg.c
index f99ac0bdc8..82cff11738 100644
--- a/ffmpeg-examples/sherpa-onnx-ffmpeg.c
+++ b/ffmpeg-examples/sherpa-onnx-ffmpeg.c
@@ -214,8 +214,8 @@ static int init_filters(const char *filters_descr) {
}
static void sherpa_decode_frame(const AVFrame *frame,
- SherpaOnnxOnlineRecognizer *recognizer,
- SherpaOnnxOnlineStream *stream,
+ const SherpaOnnxOnlineRecognizer *recognizer,
+ const SherpaOnnxOnlineStream *stream,
const SherpaOnnxDisplay *display,
int32_t *segment_id) {
#define N 3200 // 100s. Sample rate is fixed to 16 kHz
@@ -290,7 +290,7 @@ int main(int argc, char **argv) {
}
SherpaOnnxOnlineRecognizerConfig config;
- memset(&config, 0, sizeof(config));
+ memset(&config, 0, sizeof(config));
config.model_config.tokens = argv[1];
config.model_config.transducer.encoder = argv[2];
config.model_config.transducer.decoder = argv[3];
@@ -318,9 +318,10 @@ int main(int argc, char **argv) {
config.rule2_min_trailing_silence = 1.2;
config.rule3_min_utterance_length = 300;
- SherpaOnnxOnlineRecognizer *recognizer =
+ const SherpaOnnxOnlineRecognizer *recognizer =
SherpaOnnxCreateOnlineRecognizer(&config);
- SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+ const SherpaOnnxOnlineStream *stream =
+ SherpaOnnxCreateOnlineStream(recognizer);
const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
int32_t segment_id = 0;
diff --git a/flutter-examples/README.md b/flutter-examples/README.md
index b7ed667156..3bb6b52c5a 100644
--- a/flutter-examples/README.md
+++ b/flutter-examples/README.md
@@ -136,14 +136,16 @@ flutter create --platforms ios ./
Connect your iPhone to the computer, and run `flutter devices`, which will print:
```bash
-Found 3 connected devices:
- iPhone (mobile) • 00008030-001064212E85802E • ios • iOS 16.3 20D47
- macOS (desktop) • macos • darwin-x64 • macOS 13.1 22C65 darwin-x64
- Chrome (web) • chrome • web-javascript • Google Chrome 126.0.6478.127
+Found 4 connected devices:
+ iPhone 14 (mobile) • 634110C4-168D-408F-A938-D7FC62222579 • ios • com.apple.CoreSimulator.SimRuntime.iOS-16-2 (simulator)
+ iPhone (mobile) • 00008030-001064212E85802E • ios • iOS 16.3 20D47
+ macOS (desktop) • macos • darwin-x64 • macOS 13.1 22C65 darwin-x64
+ Chrome (web) • chrome • web-javascript • Google Chrome 126.0.6478.127
No wireless devices were found.
Run "flutter emulators" to list and start any available device emulators.
+(E.g., flutter emulators --launch ios)
If you expected another device to be detected, please run "flutter doctor" to diagnose potential issues. You may also try increasing the time to wait
for connected devices with the "--device-timeout" flag. Visit https://flutter.dev/setup/ for troubleshooting tips.
diff --git a/flutter-examples/streaming_asr/pubspec.yaml b/flutter-examples/streaming_asr/pubspec.yaml
index a09ecdeed4..0fc3f36631 100644
--- a/flutter-examples/streaming_asr/pubspec.yaml
+++ b/flutter-examples/streaming_asr/pubspec.yaml
@@ -5,7 +5,7 @@ description: >
publish_to: 'none'
-version: 1.10.27
+version: 1.10.42
topics:
- speech-recognition
@@ -31,7 +31,7 @@ dependencies:
record: ^5.1.0
url_launcher: ^6.2.6
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
diff --git a/flutter-examples/tts/lib/isolate_tts.dart b/flutter-examples/tts/lib/isolate_tts.dart
new file mode 100644
index 0000000000..950503c3c2
--- /dev/null
+++ b/flutter-examples/tts/lib/isolate_tts.dart
@@ -0,0 +1,246 @@
+import 'dart:io';
+import 'dart:isolate';
+
+import 'package:flutter/material.dart';
+import 'package:flutter/services.dart';
+import 'package:media_kit/media_kit.dart';
+import 'package:path/path.dart' as p;
+import 'package:path_provider/path_provider.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import 'utils.dart';
+
+class _IsolateTask {
+ final SendPort sendPort;
+
+ RootIsolateToken? rootIsolateToken;
+
+ _IsolateTask(this.sendPort, this.rootIsolateToken);
+}
+
+class _PortModel {
+ final String method;
+
+ final SendPort? sendPort;
+ dynamic data;
+
+ _PortModel({
+ required this.method,
+ this.sendPort,
+ this.data,
+ });
+}
+
+class _TtsManager {
+ /// 主进程通信端口
+ final ReceivePort receivePort;
+
+ final Isolate isolate;
+
+ final SendPort isolatePort;
+
+ _TtsManager({
+ required this.receivePort,
+ required this.isolate,
+ required this.isolatePort,
+ });
+}
+
+class IsolateTts {
+ static late final _TtsManager _ttsManager;
+
+ /// 获取线程里的通信端口
+ static SendPort get _sendPort => _ttsManager.isolatePort;
+
+ static late sherpa_onnx.OfflineTts _tts;
+
+ static late Player _player;
+
+ static Future init() async {
+ ReceivePort port = ReceivePort();
+ RootIsolateToken? rootIsolateToken = RootIsolateToken.instance;
+
+ Isolate isolate = await Isolate.spawn(
+ _isolateEntry,
+ _IsolateTask(port.sendPort, rootIsolateToken),
+ errorsAreFatal: false,
+ );
+ port.listen((msg) async {
+ if (msg is SendPort) {
+ print(11);
+ _ttsManager = _TtsManager(receivePort: port, isolate: isolate, isolatePort: msg);
+ return;
+ }
+ });
+ }
+
+ static Future _isolateEntry(_IsolateTask task) async {
+ if (task.rootIsolateToken != null) {
+ BackgroundIsolateBinaryMessenger.ensureInitialized(task.rootIsolateToken!);
+ }
+ MediaKit.ensureInitialized();
+ _player = Player();
+ sherpa_onnx.initBindings();
+ final receivePort = ReceivePort();
+ task.sendPort.send(receivePort.sendPort);
+
+ String modelDir = '';
+ String modelName = '';
+ String ruleFsts = '';
+ String ruleFars = '';
+ String lexicon = '';
+ String dataDir = '';
+ String dictDir = '';
+
+ // Example 7
+ // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+ // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
+ modelDir = 'vits-melo-tts-zh_en';
+ modelName = 'model.onnx';
+ lexicon = 'lexicon.txt';
+ dictDir = 'vits-melo-tts-zh_en/dict';
+
+ if (modelName == '') {
+ throw Exception('You are supposed to select a model by changing the code before you run the app');
+ }
+
+ final Directory directory = await getApplicationDocumentsDirectory();
+ modelName = p.join(directory.path, modelDir, modelName);
+
+ if (ruleFsts != '') {
+ final all = ruleFsts.split(',');
+ var tmp = [];
+ for (final f in all) {
+ tmp.add(p.join(directory.path, f));
+ }
+ ruleFsts = tmp.join(',');
+ }
+
+ if (ruleFars != '') {
+ final all = ruleFars.split(',');
+ var tmp = [];
+ for (final f in all) {
+ tmp.add(p.join(directory.path, f));
+ }
+ ruleFars = tmp.join(',');
+ }
+
+ if (lexicon != '') {
+ lexicon = p.join(directory.path, modelDir, lexicon);
+ }
+
+ if (dataDir != '') {
+ dataDir = p.join(directory.path, dataDir);
+ }
+
+ if (dictDir != '') {
+ dictDir = p.join(directory.path, dictDir);
+ }
+
+ final tokens = p.join(directory.path, modelDir, 'tokens.txt');
+
+ final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+ model: modelName,
+ lexicon: lexicon,
+ tokens: tokens,
+ dataDir: dataDir,
+ dictDir: dictDir,
+ );
+
+ final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+ vits: vits,
+ numThreads: 2,
+ debug: true,
+ provider: 'cpu',
+ );
+
+ final config = sherpa_onnx.OfflineTtsConfig(
+ model: modelConfig,
+ ruleFsts: ruleFsts,
+ ruleFars: ruleFars,
+ maxNumSenetences: 1,
+ );
+ // print(config);
+ receivePort.listen((msg) async {
+ print(msg);
+ if (msg is _PortModel) {
+ switch (msg.method) {
+ case 'generate':
+ {
+ _PortModel _v = msg;
+ final stopwatch = Stopwatch();
+ stopwatch.start();
+ final audio = _tts.generate(text: _v.data['text'], sid: _v.data['sid'], speed: _v.data['speed']);
+ final suffix = '-sid-${_v.data['sid']}-speed-${_v.data['sid'].toStringAsPrecision(2)}';
+ final filename = await generateWaveFilename(suffix);
+
+ final ok = sherpa_onnx.writeWave(
+ filename: filename,
+ samples: audio.samples,
+ sampleRate: audio.sampleRate,
+ );
+
+ if (ok) {
+ stopwatch.stop();
+ double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();
+
+ double waveDuration = audio.samples.length.toDouble() / audio.sampleRate.toDouble();
+
+ print('Saved to\n$filename\n'
+ 'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
+ 'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
+ 'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
+ '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ');
+
+ await _player.open(Media('file:///$filename'));
+ await _player.play();
+ }
+ }
+ break;
+ }
+ }
+ });
+ _tts = sherpa_onnx.OfflineTts(config);
+ }
+
+ static Future generate({required String text, int sid = 0, double speed = 1.0}) async {
+ ReceivePort receivePort = ReceivePort();
+ _sendPort.send(_PortModel(
+ method: 'generate',
+ data: {'text': text, 'sid': sid, 'speed': speed},
+ sendPort: receivePort.sendPort,
+ ));
+ await receivePort.first;
+ receivePort.close();
+ }
+}
+
+/// 这里是页面
+class IsolateTtsView extends StatefulWidget {
+ const IsolateTtsView({super.key});
+
+ @override
+ State createState() => _IsolateTtsViewState();
+}
+
+class _IsolateTtsViewState extends State {
+ @override
+ void initState() {
+ super.initState();
+ IsolateTts.init();
+ }
+
+ @override
+ Widget build(BuildContext context) {
+ return Scaffold(
+ body: Center(
+ child: ElevatedButton(
+ onPressed: () {
+ IsolateTts.generate(text: '这是已退出的 isolate TTS');
+ },
+ child: Text('Isolate TTS'),
+ ),
+ ),
+ );
+ }
+}
diff --git a/flutter-examples/tts/lib/main.dart b/flutter-examples/tts/lib/main.dart
index 91bc120e87..78042254ab 100644
--- a/flutter-examples/tts/lib/main.dart
+++ b/flutter-examples/tts/lib/main.dart
@@ -1,8 +1,9 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'package:flutter/material.dart';
-import './tts.dart';
import './info.dart';
+import './tts.dart';
+import 'isolate_tts.dart';
void main() {
runApp(const MyApp());
@@ -38,6 +39,7 @@ class _MyHomePageState extends State {
final List _tabs = [
TtsScreen(),
InfoScreen(),
+ IsolateTtsView(),
];
@override
Widget build(BuildContext context) {
@@ -62,6 +64,10 @@ class _MyHomePageState extends State {
icon: Icon(Icons.info),
label: 'Info',
),
+ BottomNavigationBarItem(
+ icon: Icon(Icons.multiline_chart),
+ label: 'isolate',
+ ),
],
),
);
diff --git a/flutter-examples/tts/lib/model.dart b/flutter-examples/tts/lib/model.dart
index 16ada98c38..b95ebca53f 100644
--- a/flutter-examples/tts/lib/model.dart
+++ b/flutter-examples/tts/lib/model.dart
@@ -24,13 +24,14 @@ Future createOfflineTts() async {
String modelDir = '';
String modelName = '';
+ String voices = ''; // for Kokoro only
String ruleFsts = '';
String ruleFars = '';
String lexicon = '';
String dataDir = '';
String dictDir = '';
- // You can select an example below and change it according to match your
+ // You can select an example below and change it accordingly to match your
// selected tts model
// ============================================================
@@ -84,6 +85,22 @@ Future createOfflineTts() async {
// lexicon = 'lexicon.txt';
// dictDir = 'vits-melo-tts-zh_en/dict';
+ // Example 8
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
+ // modelDir = 'kokoro-en-v0_19';
+ // modelName = 'model.onnx';
+ // voices = 'voices.bin';
+ // dataDir = 'kokoro-en-v0_19/espeak-ng-data';
+
+ // Example 9
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+ // modelDir = 'kokoro-multi-lang-v1_0';
+ // modelName = 'model.onnx';
+ // voices = 'voices.bin';
+ // dataDir = 'kokoro-multi-lang-v1_0/espeak-ng-data';
+ // dictDir = 'kokoro-multi-lang-v1_0/dict';
+ // lexicon = 'kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt';
+
// ============================================================
// Please don't change the remaining part of this function
// ============================================================
@@ -113,7 +130,14 @@ Future createOfflineTts() async {
ruleFars = tmp.join(',');
}
- if (lexicon != '') {
+ if (lexicon.contains(',')) {
+ final all = lexicon.split(',');
+ var tmp = [];
+ for (final f in all) {
+ tmp.add(p.join(directory.path, f));
+ }
+ lexicon = tmp.join(',');
+ } else if (lexicon != '') {
lexicon = p.join(directory.path, modelDir, lexicon);
}
@@ -126,17 +150,38 @@ Future createOfflineTts() async {
}
final tokens = p.join(directory.path, modelDir, 'tokens.txt');
+ if (voices != '') {
+ voices = p.join(directory.path, modelDir, voices);
+ }
- final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
- model: modelName,
- lexicon: lexicon,
- tokens: tokens,
- dataDir: dataDir,
- dictDir: dictDir,
- );
+ late final sherpa_onnx.OfflineTtsVitsModelConfig vits;
+ late final sherpa_onnx.OfflineTtsKokoroModelConfig kokoro;
+
+ if (voices != '') {
+ vits = sherpa_onnx.OfflineTtsVitsModelConfig();
+ kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
+ model: modelName,
+ voices: voices,
+ tokens: tokens,
+ dataDir: dataDir,
+ dictDir: dictDir,
+ lexicon: lexicon,
+ );
+ } else {
+ vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+ model: modelName,
+ lexicon: lexicon,
+ tokens: tokens,
+ dataDir: dataDir,
+ dictDir: dictDir,
+ );
+
+ kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig();
+ }
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
vits: vits,
+ kokoro: kokoro,
numThreads: 2,
debug: true,
provider: 'cpu',
diff --git a/flutter-examples/tts/lib/tts.dart b/flutter-examples/tts/lib/tts.dart
index 342bf070b4..cdf799612e 100644
--- a/flutter-examples/tts/lib/tts.dart
+++ b/flutter-examples/tts/lib/tts.dart
@@ -77,9 +77,7 @@ class _TtsScreenState extends State {
onTapOutside: (PointerDownEvent event) {
FocusManager.instance.primaryFocus?.unfocus();
},
- inputFormatters: [
- FilteringTextInputFormatter.digitsOnly
- ]),
+ inputFormatters: [FilteringTextInputFormatter.digitsOnly]),
Slider(
// decoration: InputDecoration(
// labelText: "speech speed",
@@ -108,125 +106,117 @@ class _TtsScreenState extends State {
},
),
const SizedBox(height: 5),
- Row(
- mainAxisAlignment: MainAxisAlignment.center,
- children: [
- OutlinedButton(
- child: Text("Generate"),
- onPressed: () async {
- await _init();
- await _player?.stop();
-
- setState(() {
- _maxSpeakerID = _tts?.numSpeakers ?? 0;
- if (_maxSpeakerID > 0) {
- _maxSpeakerID -= 1;
- }
- });
-
- if (_tts == null) {
- _controller_hint.value = TextEditingValue(
- text: 'Failed to initialize tts',
- );
- return;
- }
-
- _controller_hint.value = TextEditingValue(
- text: '',
- );
-
- final text = _controller_text_input.text.trim();
- if (text == '') {
- _controller_hint.value = TextEditingValue(
- text: 'Please first input your text to generate',
- );
- return;
- }
-
- final sid =
- int.tryParse(_controller_sid.text.trim()) ?? 0;
-
- final stopwatch = Stopwatch();
- stopwatch.start();
- final audio =
- _tts!.generate(text: text, sid: sid, speed: _speed);
- final suffix =
- '-sid-$sid-speed-${_speed.toStringAsPrecision(2)}';
- final filename = await generateWaveFilename(suffix);
-
- final ok = sherpa_onnx.writeWave(
- filename: filename,
- samples: audio.samples,
- sampleRate: audio.sampleRate,
- );
-
- if (ok) {
- stopwatch.stop();
- double elapsed =
- stopwatch.elapsed.inMilliseconds.toDouble();
-
- double waveDuration =
- audio.samples.length.toDouble() /
- audio.sampleRate.toDouble();
-
- _controller_hint.value = TextEditingValue(
- text: 'Saved to\n$filename\n'
- 'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
- 'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
- 'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
- '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ',
- );
- _lastFilename = filename;
-
- await _player?.play(DeviceFileSource(_lastFilename));
- } else {
- _controller_hint.value = TextEditingValue(
- text: 'Failed to save generated audio',
- );
- }
- },
- ),
- const SizedBox(width: 5),
- OutlinedButton(
- child: Text("Clear"),
- onPressed: () {
- _controller_text_input.value = TextEditingValue(
- text: '',
- );
-
- _controller_hint.value = TextEditingValue(
- text: '',
- );
- },
- ),
- const SizedBox(width: 5),
- OutlinedButton(
- child: Text("Play"),
- onPressed: () async {
- if (_lastFilename == '') {
- _controller_hint.value = TextEditingValue(
- text: 'No generated wave file found',
- );
- return;
- }
- await _player?.stop();
- await _player?.play(DeviceFileSource(_lastFilename));
- _controller_hint.value = TextEditingValue(
- text: 'Playing\n$_lastFilename',
- );
- },
- ),
- const SizedBox(width: 5),
- OutlinedButton(
- child: Text("Stop"),
- onPressed: () async {
- await _player?.stop();
- _controller_hint.value = TextEditingValue(
- text: '',
- );
- },
- ),
- ]),
+ Row(mainAxisAlignment: MainAxisAlignment.center, children: [
+ OutlinedButton(
+ child: Text("Generate"),
+ onPressed: () async {
+ await _init();
+ await _player?.stop();
+
+ setState(() {
+ _maxSpeakerID = _tts?.numSpeakers ?? 0;
+ if (_maxSpeakerID > 0) {
+ _maxSpeakerID -= 1;
+ }
+ });
+
+ if (_tts == null) {
+ _controller_hint.value = TextEditingValue(
+ text: 'Failed to initialize tts',
+ );
+ return;
+ }
+
+ _controller_hint.value = TextEditingValue(
+ text: '',
+ );
+
+ final text = _controller_text_input.text.trim();
+ if (text == '') {
+ _controller_hint.value = TextEditingValue(
+ text: 'Please first input your text to generate',
+ );
+ return;
+ }
+
+ final sid = int.tryParse(_controller_sid.text.trim()) ?? 0;
+
+ final stopwatch = Stopwatch();
+ stopwatch.start();
+ final audio = _tts!.generate(text: text, sid: sid, speed: _speed);
+ final suffix = '-sid-$sid-speed-${_speed.toStringAsPrecision(2)}';
+ final filename = await generateWaveFilename(suffix);
+
+ final ok = sherpa_onnx.writeWave(
+ filename: filename,
+ samples: audio.samples,
+ sampleRate: audio.sampleRate,
+ );
+
+ if (ok) {
+ stopwatch.stop();
+ double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();
+
+ double waveDuration = audio.samples.length.toDouble() / audio.sampleRate.toDouble();
+
+ _controller_hint.value = TextEditingValue(
+ text: 'Saved to\n$filename\n'
+ 'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
+ 'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
+ 'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
+ '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ',
+ );
+ _lastFilename = filename;
+
+ await _player?.play(DeviceFileSource(_lastFilename));
+ } else {
+ _controller_hint.value = TextEditingValue(
+ text: 'Failed to save generated audio',
+ );
+ }
+ },
+ ),
+ const SizedBox(width: 5),
+ OutlinedButton(
+ child: Text("Clear"),
+ onPressed: () {
+ _controller_text_input.value = TextEditingValue(
+ text: '',
+ );
+
+ _controller_hint.value = TextEditingValue(
+ text: '',
+ );
+ },
+ ),
+ const SizedBox(width: 5),
+ OutlinedButton(
+ child: Text("Play"),
+ onPressed: () async {
+ if (_lastFilename == '') {
+ _controller_hint.value = TextEditingValue(
+ text: 'No generated wave file found',
+ );
+ return;
+ }
+ await _player?.stop();
+ await _player?.play(DeviceFileSource(_lastFilename));
+ _controller_hint.value = TextEditingValue(
+ text: 'Playing\n$_lastFilename',
+ );
+ },
+ ),
+ const SizedBox(width: 5),
+ OutlinedButton(
+ child: Text("Stop"),
+ onPressed: () async {
+ await _player?.stop();
+ _controller_hint.value = TextEditingValue(
+ text: '',
+ );
+ },
+ ),
+ ]),
const SizedBox(height: 5),
TextField(
decoration: InputDecoration(
diff --git a/flutter-examples/tts/pubspec.yaml b/flutter-examples/tts/pubspec.yaml
index 975c0330d3..6e54bd734c 100644
--- a/flutter-examples/tts/pubspec.yaml
+++ b/flutter-examples/tts/pubspec.yaml
@@ -5,7 +5,7 @@ description: >
publish_to: 'none' # Remove this line if you wish to publish to pub.dev
-version: 1.10.27
+version: 1.10.42
environment:
sdk: ">=2.17.0 <4.0.0"
@@ -18,12 +18,18 @@ dependencies:
cupertino_icons: ^1.0.6
path_provider: ^2.1.3
path: ^1.9.0
- sherpa_onnx: ^1.10.27
+ sherpa_onnx: ^1.10.42
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
url_launcher: 6.2.6
url_launcher_linux: 3.1.0
audioplayers: ^5.0.0
+ media_kit:
+ media_kit_libs_video:
flutter:
uses-material-design: true
+
+ assets:
+ - assets/vits-melo-tts-zh_en/
+ - assets/vits-melo-tts-zh_en/dict/
\ No newline at end of file
diff --git a/flutter/sherpa_onnx/example/example.md b/flutter/sherpa_onnx/example/example.md
index 7e7e8031d0..9255b7ee92 100644
--- a/flutter/sherpa_onnx/example/example.md
+++ b/flutter/sherpa_onnx/example/example.md
@@ -4,13 +4,14 @@
| Functions | URL | Supported Platforms|
|---|---|---|
-|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, macOS, Windows|
+|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, iOS, macOS, Windows|
|Speech synthesis| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/tts)| Android, iOS, Linux, macOS, Windows|
## Pure dart-examples
| Functions | URL | Supported Platforms|
|---|---|---|
+|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux|
|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux|
|Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux|
|Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux|
diff --git a/flutter/sherpa_onnx/lib/sherpa_onnx.dart b/flutter/sherpa_onnx/lib/sherpa_onnx.dart
index b15e675329..b9fb7dd53e 100644
--- a/flutter/sherpa_onnx/lib/sherpa_onnx.dart
+++ b/flutter/sherpa_onnx/lib/sherpa_onnx.dart
@@ -6,6 +6,7 @@ export 'src/audio_tagging.dart';
export 'src/feature_config.dart';
export 'src/keyword_spotter.dart';
export 'src/offline_recognizer.dart';
+export 'src/offline_speaker_diarization.dart';
export 'src/offline_stream.dart';
export 'src/online_recognizer.dart';
export 'src/online_stream.dart';
@@ -24,7 +25,7 @@ String? _path;
// https://github.com/flutter/codelabs/blob/main/ffigen_codelab/step_05/lib/ffigen_app.dart
// https://api.flutter.dev/flutter/dart-io/Platform-class.html
final DynamicLibrary _dylib = () {
- if (Platform.isMacOS || Platform.isIOS) {
+ if (Platform.isMacOS) {
if (_path == null) {
return DynamicLibrary.open('libsherpa-onnx-c-api.dylib');
} else {
@@ -32,6 +33,14 @@ final DynamicLibrary _dylib = () {
}
}
+ if (Platform.isIOS) {
+ if (_path == null) {
+ return DynamicLibrary.open('sherpa_onnx.framework/sherpa_onnx');
+ } else {
+ return DynamicLibrary.open('$_path/sherpa_onnx.framework/sherpa_onnx');
+ }
+ }
+
if (Platform.isAndroid || Platform.isLinux) {
if (_path == null) {
return DynamicLibrary.open('libsherpa-onnx-c-api.so');
diff --git a/flutter/sherpa_onnx/lib/src/audio_tagging.dart b/flutter/sherpa_onnx/lib/src/audio_tagging.dart
index 6c650b30cc..3e3dbed2f4 100644
--- a/flutter/sherpa_onnx/lib/src/audio_tagging.dart
+++ b/flutter/sherpa_onnx/lib/src/audio_tagging.dart
@@ -62,6 +62,8 @@ class AudioEvent {
}
class AudioTagging {
+ AudioTagging.fromPtr({required this.ptr, required this.config});
+
AudioTagging._({required this.ptr, required this.config});
// The user has to invoke AudioTagging.free() to avoid memory leak.
diff --git a/flutter/sherpa_onnx/lib/src/keyword_spotter.dart b/flutter/sherpa_onnx/lib/src/keyword_spotter.dart
index c098679954..310657d1a0 100644
--- a/flutter/sherpa_onnx/lib/src/keyword_spotter.dart
+++ b/flutter/sherpa_onnx/lib/src/keyword_spotter.dart
@@ -53,6 +53,8 @@ class KeywordResult {
}
class KeywordSpotter {
+ KeywordSpotter.fromPtr({required this.ptr, required this.config});
+
KeywordSpotter._({required this.ptr, required this.config});
/// The user is responsible to call the OnlineRecognizer.free()
@@ -166,6 +168,10 @@ class KeywordSpotter {
SherpaOnnxBindings.decodeKeywordStream?.call(ptr, stream.ptr);
}
+ void reset(OnlineStream stream) {
+ SherpaOnnxBindings.resetKeywordStream?.call(ptr, stream.ptr);
+ }
+
Pointer ptr;
KeywordSpotterConfig config;
}
diff --git a/flutter/sherpa_onnx/lib/src/offline_recognizer.dart b/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
index 749ffb316d..01bceccceb 100644
--- a/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
+++ b/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
@@ -68,6 +68,24 @@ class OfflineWhisperModelConfig {
final int tailPaddings;
}
+class OfflineMoonshineModelConfig {
+ const OfflineMoonshineModelConfig(
+ {this.preprocessor = '',
+ this.encoder = '',
+ this.uncachedDecoder = '',
+ this.cachedDecoder = ''});
+
+ @override
+ String toString() {
+ return 'OfflineMoonshineModelConfig(preprocessor: $preprocessor, encoder: $encoder, uncachedDecoder: $uncachedDecoder, cachedDecoder: $cachedDecoder)';
+ }
+
+ final String preprocessor;
+ final String encoder;
+ final String uncachedDecoder;
+ final String cachedDecoder;
+}
+
class OfflineTdnnModelConfig {
const OfflineTdnnModelConfig({this.model = ''});
@@ -116,6 +134,7 @@ class OfflineModelConfig {
this.whisper = const OfflineWhisperModelConfig(),
this.tdnn = const OfflineTdnnModelConfig(),
this.senseVoice = const OfflineSenseVoiceModelConfig(),
+ this.moonshine = const OfflineMoonshineModelConfig(),
required this.tokens,
this.numThreads = 1,
this.debug = true,
@@ -128,7 +147,7 @@ class OfflineModelConfig {
@override
String toString() {
- return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
+ return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
}
final OfflineTransducerModelConfig transducer;
@@ -137,6 +156,7 @@ class OfflineModelConfig {
final OfflineWhisperModelConfig whisper;
final OfflineTdnnModelConfig tdnn;
final OfflineSenseVoiceModelConfig senseVoice;
+ final OfflineMoonshineModelConfig moonshine;
final String tokens;
final int numThreads;
@@ -207,6 +227,8 @@ class OfflineRecognizerResult {
}
class OfflineRecognizer {
+ OfflineRecognizer.fromPtr({required this.ptr, required this.config});
+
OfflineRecognizer._({required this.ptr, required this.config});
void free() {
@@ -257,6 +279,15 @@ class OfflineRecognizer {
c.ref.model.senseVoice.useInverseTextNormalization =
config.model.senseVoice.useInverseTextNormalization ? 1 : 0;
+ c.ref.model.moonshine.preprocessor =
+ config.model.moonshine.preprocessor.toNativeUtf8();
+ c.ref.model.moonshine.encoder =
+ config.model.moonshine.encoder.toNativeUtf8();
+ c.ref.model.moonshine.uncachedDecoder =
+ config.model.moonshine.uncachedDecoder.toNativeUtf8();
+ c.ref.model.moonshine.cachedDecoder =
+ config.model.moonshine.cachedDecoder.toNativeUtf8();
+
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
c.ref.model.numThreads = config.model.numThreads;
@@ -294,6 +325,10 @@ class OfflineRecognizer {
calloc.free(c.ref.model.modelType);
calloc.free(c.ref.model.provider);
calloc.free(c.ref.model.tokens);
+ calloc.free(c.ref.model.moonshine.cachedDecoder);
+ calloc.free(c.ref.model.moonshine.uncachedDecoder);
+ calloc.free(c.ref.model.moonshine.encoder);
+ calloc.free(c.ref.model.moonshine.preprocessor);
calloc.free(c.ref.model.senseVoice.language);
calloc.free(c.ref.model.senseVoice.model);
calloc.free(c.ref.model.tdnn.model);
diff --git a/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart b/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
new file mode 100644
index 0000000000..fe046a1668
--- /dev/null
+++ b/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
@@ -0,0 +1,246 @@
+// Copyright (c) 2024 Xiaomi Corporation
+import 'dart:ffi';
+import 'dart:typed_data';
+
+import 'package:ffi/ffi.dart';
+
+import './sherpa_onnx_bindings.dart';
+import './speaker_identification.dart';
+
+class OfflineSpeakerDiarizationSegment {
+ const OfflineSpeakerDiarizationSegment({
+ required this.start,
+ required this.end,
+ required this.speaker,
+ });
+
+ @override
+ String toString() {
+ return 'OfflineSpeakerDiarizationSegment(start: $start, end: $end, speaker: $speaker)';
+ }
+
+ final double start;
+ final double end;
+ final int speaker;
+}
+
+class OfflineSpeakerSegmentationPyannoteModelConfig {
+ const OfflineSpeakerSegmentationPyannoteModelConfig({
+ this.model = '',
+ });
+
+ @override
+ String toString() {
+ return 'OfflineSpeakerSegmentationPyannoteModelConfig(model: $model)';
+ }
+
+ final String model;
+}
+
+class OfflineSpeakerSegmentationModelConfig {
+ const OfflineSpeakerSegmentationModelConfig({
+ this.pyannote = const OfflineSpeakerSegmentationPyannoteModelConfig(),
+ this.numThreads = 1,
+ this.debug = true,
+ this.provider = 'cpu',
+ });
+
+ @override
+ String toString() {
+ return 'OfflineSpeakerSegmentationModelConfig(pyannote: $pyannote, numThreads: $numThreads, debug: $debug, provider: $provider)';
+ }
+
+ final OfflineSpeakerSegmentationPyannoteModelConfig pyannote;
+
+ final int numThreads;
+ final bool debug;
+ final String provider;
+}
+
+class FastClusteringConfig {
+ const FastClusteringConfig({
+ this.numClusters = -1,
+ this.threshold = 0.5,
+ });
+
+ @override
+ String toString() {
+ return 'FastClusteringConfig(numClusters: $numClusters, threshold: $threshold)';
+ }
+
+ final int numClusters;
+ final double threshold;
+}
+
+class OfflineSpeakerDiarizationConfig {
+ const OfflineSpeakerDiarizationConfig({
+ this.segmentation = const OfflineSpeakerSegmentationModelConfig(),
+ this.embedding = const SpeakerEmbeddingExtractorConfig(model: ''),
+ this.clustering = const FastClusteringConfig(),
+ this.minDurationOn = 0.2,
+ this.minDurationOff = 0.5,
+ });
+
+ @override
+ String toString() {
+ return 'OfflineSpeakerDiarizationConfig(segmentation: $segmentation, embedding: $embedding, clustering: $clustering, minDurationOn: $minDurationOn, minDurationOff: $minDurationOff)';
+ }
+
+ final OfflineSpeakerSegmentationModelConfig segmentation;
+ final SpeakerEmbeddingExtractorConfig embedding;
+ final FastClusteringConfig clustering;
+ final double minDurationOff; // in seconds
+ final double minDurationOn; // in seconds
+}
+
+class OfflineSpeakerDiarization {
+ OfflineSpeakerDiarization.fromPtr(
+ {required this.ptr, required this.config, required this.sampleRate});
+
+ OfflineSpeakerDiarization._(
+ {required this.ptr, required this.config, required this.sampleRate});
+
+ void free() {
+ SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization?.call(ptr);
+ ptr = nullptr;
+ }
+
+ /// The user is responsible to call the OfflineSpeakerDiarization.free()
+ /// method of the returned instance to avoid memory leak.
+ factory OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) {
+ final c = calloc();
+
+ c.ref.segmentation.pyannote.model =
+ config.segmentation.pyannote.model.toNativeUtf8();
+ c.ref.segmentation.numThreads = config.segmentation.numThreads;
+ c.ref.segmentation.debug = config.segmentation.debug ? 1 : 0;
+ c.ref.segmentation.provider = config.segmentation.provider.toNativeUtf8();
+
+ c.ref.embedding.model = config.embedding.model.toNativeUtf8();
+ c.ref.embedding.numThreads = config.embedding.numThreads;
+ c.ref.embedding.debug = config.embedding.debug ? 1 : 0;
+ c.ref.embedding.provider = config.embedding.provider.toNativeUtf8();
+
+ c.ref.clustering.numClusters = config.clustering.numClusters;
+ c.ref.clustering.threshold = config.clustering.threshold;
+
+ c.ref.minDurationOn = config.minDurationOn;
+ c.ref.minDurationOff = config.minDurationOff;
+
+ final ptr =
+ SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ??
+ nullptr;
+
+ calloc.free(c.ref.embedding.provider);
+ calloc.free(c.ref.embedding.model);
+ calloc.free(c.ref.segmentation.provider);
+ calloc.free(c.ref.segmentation.pyannote.model);
+
+ int sampleRate = 0;
+ if (ptr != nullptr) {
+ sampleRate = SherpaOnnxBindings
+ .sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
+ ?.call(ptr) ??
+ 0;
+ }
+ return OfflineSpeakerDiarization._(
+ ptr: ptr, config: config, sampleRate: sampleRate);
+ }
+
+ List process(
+ {required Float32List samples}) {
+ if (ptr == nullptr) {
+ return [];
+ }
+
+ final n = samples.length;
+ final Pointer p = calloc(n);
+
+ final pList = p.asTypedList(n);
+ pList.setAll(0, samples);
+
+ final r = SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess
+ ?.call(ptr, p, n) ??
+ nullptr;
+
+ final ans = _processImpl(r);
+
+ SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
+ ?.call(r);
+
+ return ans;
+ }
+
+ List processWithCallback({
+ required Float32List samples,
+ required int Function(int numProcessedChunks, int numTotalChunks) callback,
+ }) {
+ if (ptr == nullptr) {
+ return [];
+ }
+
+ final n = samples.length;
+ final Pointer p = calloc(n);
+
+ final pList = p.asTypedList(n);
+ pList.setAll(0, samples);
+
+ final wrapper = NativeCallable<
+ SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>.isolateLocal(
+ (int numProcessedChunks, int numTotalChunks) {
+ return callback(numProcessedChunks, numTotalChunks);
+ }, exceptionalReturn: 0);
+
+ final r = SherpaOnnxBindings
+ .sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
+ ?.call(ptr, p, n, wrapper.nativeFunction) ??
+ nullptr;
+
+ wrapper.close();
+
+ final ans = _processImpl(r);
+
+ SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
+ ?.call(r);
+
+ return ans;
+ }
+
+ List _processImpl(
+ Pointer r) {
+ if (r == nullptr) {
+ return [];
+ }
+
+ final numSegments = SherpaOnnxBindings
+ .sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
+ ?.call(r) ??
+ 0;
+ final segments = SherpaOnnxBindings
+ .sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
+ ?.call(r) ??
+ nullptr;
+
+ if (segments == nullptr) {
+ return [];
+ }
+
+ final ans = [];
+ for (int i = 0; i != numSegments; ++i) {
+ final s = segments + i;
+
+ final tmp = OfflineSpeakerDiarizationSegment(
+ start: s.ref.start, end: s.ref.end, speaker: s.ref.speaker);
+ ans.add(tmp);
+ }
+
+ SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroySegment
+ ?.call(segments);
+
+ return ans;
+ }
+
+ Pointer ptr;
+ OfflineSpeakerDiarizationConfig config;
+ final int sampleRate;
+}
diff --git a/flutter/sherpa_onnx/lib/src/online_recognizer.dart b/flutter/sherpa_onnx/lib/src/online_recognizer.dart
index 18d5a60006..69ed93894d 100644
--- a/flutter/sherpa_onnx/lib/src/online_recognizer.dart
+++ b/flutter/sherpa_onnx/lib/src/online_recognizer.dart
@@ -162,6 +162,8 @@ class OnlineRecognizerResult {
}
class OnlineRecognizer {
+ OnlineRecognizer.fromPtr({required this.ptr, required this.config});
+
OnlineRecognizer._({required this.ptr, required this.config});
/// The user is responsible to call the OnlineRecognizer.free()
diff --git a/flutter/sherpa_onnx/lib/src/punctuation.dart b/flutter/sherpa_onnx/lib/src/punctuation.dart
index b4197fa46b..dd38a2445b 100644
--- a/flutter/sherpa_onnx/lib/src/punctuation.dart
+++ b/flutter/sherpa_onnx/lib/src/punctuation.dart
@@ -36,6 +36,8 @@ class OfflinePunctuationConfig {
}
class OfflinePunctuation {
+ OfflinePunctuation.fromPtr({required this.ptr, required this.config});
+
OfflinePunctuation._({required this.ptr, required this.config});
// The user has to invoke OfflinePunctuation.free() to avoid memory leak.
diff --git a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
index 42294c2d4a..c22c2a528c 100644
--- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
+++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
@@ -2,6 +2,66 @@
import 'dart:ffi';
import 'package:ffi/ffi.dart';
+final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
+ external Pointer model;
+
+ @Int32()
+ external int numThreads;
+
+ @Int32()
+ external int debug;
+
+ external Pointer provider;
+}
+
+final class SherpaOnnxOfflineSpeakerDiarizationSegment extends Struct {
+ @Float()
+ external double start;
+
+ @Float()
+ external double end;
+
+ @Int32()
+ external int speaker;
+}
+
+final class SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
+ extends Struct {
+ external Pointer model;
+}
+
+final class SherpaOnnxOfflineSpeakerSegmentationModelConfig extends Struct {
+ external SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote;
+
+ @Int32()
+ external int numThreads;
+
+ @Int32()
+ external int debug;
+
+ external Pointer provider;
+}
+
+final class SherpaOnnxFastClusteringConfig extends Struct {
+ @Int32()
+ external int numClusters;
+
+ @Float()
+ external double threshold;
+}
+
+final class SherpaOnnxOfflineSpeakerDiarizationConfig extends Struct {
+ external SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation;
+ external SherpaOnnxSpeakerEmbeddingExtractorConfig embedding;
+ external SherpaOnnxFastClusteringConfig clustering;
+
+ @Float()
+ external double minDurationOn;
+
+ @Float()
+ external double minDurationOff;
+}
+
final class SherpaOnnxOfflinePunctuationModelConfig extends Struct {
external Pointer ctTransformer;
@@ -71,6 +131,34 @@ final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct {
external Pointer dictDir;
}
+final class SherpaOnnxOfflineTtsMatchaModelConfig extends Struct {
+ external Pointer acousticModel;
+ external Pointer vocoder;
+ external Pointer lexicon;
+ external Pointer tokens;
+ external Pointer dataDir;
+
+ @Float()
+ external double noiseScale;
+
+ @Float()
+ external double lengthScale;
+
+ external Pointer dictDir;
+}
+
+final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
+ external Pointer model;
+ external Pointer voices;
+ external Pointer tokens;
+ external Pointer dataDir;
+
+ @Float()
+ external double lengthScale;
+ external Pointer dictDir;
+ external Pointer lexicon;
+}
+
final class SherpaOnnxOfflineTtsModelConfig extends Struct {
external SherpaOnnxOfflineTtsVitsModelConfig vits;
@Int32()
@@ -80,6 +168,8 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct {
external int debug;
external Pointer provider;
+ external SherpaOnnxOfflineTtsMatchaModelConfig matcha;
+ external SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
}
final class SherpaOnnxOfflineTtsConfig extends Struct {
@@ -134,6 +224,13 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
external int tailPaddings;
}
+final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
+ external Pointer preprocessor;
+ external Pointer encoder;
+ external Pointer uncachedDecoder;
+ external Pointer cachedDecoder;
+}
+
final class SherpaOnnxOfflineTdnnModelConfig extends Struct {
external Pointer model;
}
@@ -176,6 +273,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
external Pointer telespeechCtc;
external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice;
+ external SherpaOnnxOfflineMoonshineModelConfig moonshine;
}
final class SherpaOnnxOfflineRecognizerConfig extends Struct {
@@ -341,18 +439,6 @@ final class SherpaOnnxWave extends Struct {
external int numSamples;
}
-final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
- external Pointer model;
-
- @Int32()
- external int numThreads;
-
- @Int32()
- external int debug;
-
- external Pointer provider;
-}
-
final class SherpaOnnxKeywordSpotterConfig extends Struct {
external SherpaOnnxFeatureConfig feat;
@@ -402,10 +488,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
+final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {}
+
+final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {}
+
+typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative
+ = Pointer Function(
+ Pointer);
+
+typedef SherpaOnnxCreateOfflineSpeakerDiarization
+ = SherpaOnnxCreateOfflineSpeakerDiarizationNative;
+
+typedef SherpaOnnxDestroyOfflineSpeakerDiarizationNative = Void Function(
+ Pointer);
+
+typedef SherpaOnnxDestroyOfflineSpeakerDiarization = void Function(
+ Pointer);
+
typedef SherpaOnnxCreateOfflinePunctuationNative
= Pointer Function(
Pointer);
+typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative = Int32 Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRate = int Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationSetConfigNative = Void Function(
+ Pointer,
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative = Int32
+ Function(Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers = int Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative = Int32
+ Function(Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments = int Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
+ = Pointer Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
+ = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative;
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative = Void Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegment = void Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcessNative
+ = Pointer Function(
+ Pointer, Pointer, Int32);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcess
+ = Pointer Function(
+ Pointer, Pointer, int);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative = Int32
+ Function(Int32, Int32);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative
+ = Pointer Function(
+ Pointer,
+ Pointer,
+ Int32,
+ Pointer<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
+ = Pointer Function(
+ Pointer,
+ Pointer,
+ int,
+ Pointer<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative = Void Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResult = void Function(
+ Pointer);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationSetConfig = void Function(
+ Pointer,
+ Pointer);
+
typedef SherpaOnnxCreateOfflinePunctuation
= SherpaOnnxCreateOfflinePunctuationNative;
@@ -492,6 +669,12 @@ typedef DecodeKeywordStreamNative = Void Function(
typedef DecodeKeywordStream = void Function(
Pointer, Pointer);
+typedef ResetKeywordStreamNative = Void Function(
+ Pointer, Pointer);
+
+typedef ResetKeywordStream = void Function(
+ Pointer, Pointer);
+
typedef GetKeywordResultAsJsonNative = Pointer Function(
Pointer, Pointer);
@@ -940,6 +1123,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer);
typedef SherpaOnnxFreeWave = void Function(Pointer);
class SherpaOnnxBindings {
+ static SherpaOnnxCreateOfflineSpeakerDiarization?
+ sherpaOnnxCreateOfflineSpeakerDiarization;
+ static SherpaOnnxDestroyOfflineSpeakerDiarization?
+ sherpaOnnxDestroyOfflineSpeakerDiarization;
+ static SherpaOnnxOfflineSpeakerDiarizationGetSampleRate?
+ sherpaOnnxOfflineSpeakerDiarizationGetSampleRate;
+ static SherpaOnnxOfflineSpeakerDiarizationSetConfig?
+ sherpaOnnxOfflineSpeakerDiarizationSetConfig;
+ static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers?
+ sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers;
+ static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments?
+ sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments;
+ static SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime?
+ sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime;
+ static SherpaOnnxOfflineSpeakerDiarizationDestroySegment?
+ sherpaOnnxOfflineSpeakerDiarizationDestroySegment;
+ static SherpaOnnxOfflineSpeakerDiarizationProcess?
+ sherpaOnnxOfflineSpeakerDiarizationProcess;
+ static SherpaOnnxOfflineSpeakerDiarizationDestroyResult?
+ sherpaOnnxOfflineSpeakerDiarizationDestroyResult;
+ static SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg?
+ sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg;
+
static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation;
static SherpaOnnxDestroyOfflinePunctuation?
sherpaOnnxDestroyOfflinePunctuation;
@@ -959,6 +1165,7 @@ class SherpaOnnxBindings {
static CreateKeywordStreamWithKeywords? createKeywordStreamWithKeywords;
static IsKeywordStreamReady? isKeywordStreamReady;
static DecodeKeywordStream? decodeKeywordStream;
+ static ResetKeywordStream? resetKeywordStream;
static GetKeywordResultAsJson? getKeywordResultAsJson;
static FreeKeywordResultJson? freeKeywordResultJson;
@@ -1107,6 +1314,83 @@ class SherpaOnnxBindings {
static SherpaOnnxFreeWave? freeWave;
static void init(DynamicLibrary dynamicLibrary) {
+ sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxCreateOfflineSpeakerDiarizationNative>>(
+ 'SherpaOnnxCreateOfflineSpeakerDiarization')
+ .asFunction();
+
+ sherpaOnnxDestroyOfflineSpeakerDiarization ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxDestroyOfflineSpeakerDiarizationNative>>(
+ 'SherpaOnnxDestroyOfflineSpeakerDiarization')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationGetSampleRate ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationGetSampleRate')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationSetConfig ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationSetConfigNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationSetConfig')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationDestroySegment ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationDestroySegment')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationProcess ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationProcessNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationProcess')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg')
+ .asFunction();
+
+ sherpaOnnxOfflineSpeakerDiarizationDestroyResult ??= dynamicLibrary
+ .lookup<
+ NativeFunction<
+ SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative>>(
+ 'SherpaOnnxOfflineSpeakerDiarizationDestroyResult')
+ .asFunction();
+
sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary
.lookup>(
'SherpaOnnxCreateOfflinePunctuation')
@@ -1184,6 +1468,11 @@ class SherpaOnnxBindings {
'SherpaOnnxDecodeKeywordStream')
.asFunction();
+ resetKeywordStream ??= dynamicLibrary
+ .lookup>(
+ 'SherpaOnnxResetKeywordStream')
+ .asFunction();
+
getKeywordResultAsJson ??= dynamicLibrary
.lookup>(
'SherpaOnnxGetKeywordResultAsJson')
diff --git a/flutter/sherpa_onnx/lib/src/speaker_identification.dart b/flutter/sherpa_onnx/lib/src/speaker_identification.dart
index 5c2e10744a..8b27dbc69b 100644
--- a/flutter/sherpa_onnx/lib/src/speaker_identification.dart
+++ b/flutter/sherpa_onnx/lib/src/speaker_identification.dart
@@ -25,6 +25,8 @@ class SpeakerEmbeddingExtractorConfig {
}
class SpeakerEmbeddingExtractor {
+ SpeakerEmbeddingExtractor.fromPtr({required this.ptr, required this.dim});
+
SpeakerEmbeddingExtractor._({required this.ptr, required this.dim});
/// The user is responsible to call the SpeakerEmbeddingExtractor.free()
@@ -101,6 +103,8 @@ class SpeakerEmbeddingExtractor {
}
class SpeakerEmbeddingManager {
+ SpeakerEmbeddingManager.fromPtr({required this.ptr, required this.dim});
+
SpeakerEmbeddingManager._({required this.ptr, required this.dim});
// The user has to use SpeakerEmbeddingManager.free() to avoid memory leak
diff --git a/flutter/sherpa_onnx/lib/src/tts.dart b/flutter/sherpa_onnx/lib/src/tts.dart
index f779188b72..e03126d0ec 100644
--- a/flutter/sherpa_onnx/lib/src/tts.dart
+++ b/flutter/sherpa_onnx/lib/src/tts.dart
@@ -8,9 +8,9 @@ import './sherpa_onnx_bindings.dart';
class OfflineTtsVitsModelConfig {
const OfflineTtsVitsModelConfig({
- required this.model,
+ this.model = '',
this.lexicon = '',
- required this.tokens,
+ this.tokens = '',
this.dataDir = '',
this.noiseScale = 0.667,
this.noiseScaleW = 0.8,
@@ -33,9 +33,63 @@ class OfflineTtsVitsModelConfig {
final String dictDir;
}
+class OfflineTtsMatchaModelConfig {
+ const OfflineTtsMatchaModelConfig({
+ this.acousticModel = '',
+ this.vocoder = '',
+ this.lexicon = '',
+ this.tokens = '',
+ this.dataDir = '',
+ this.noiseScale = 0.667,
+ this.lengthScale = 1.0,
+ this.dictDir = '',
+ });
+
+ @override
+ String toString() {
+ return 'OfflineTtsMatchaModelConfig(acousticModel: $acousticModel, vocoder: $vocoder, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, lengthScale: $lengthScale, dictDir: $dictDir)';
+ }
+
+ final String acousticModel;
+ final String vocoder;
+ final String lexicon;
+ final String tokens;
+ final String dataDir;
+ final double noiseScale;
+ final double lengthScale;
+ final String dictDir;
+}
+
+class OfflineTtsKokoroModelConfig {
+ const OfflineTtsKokoroModelConfig({
+ this.model = '',
+ this.voices = '',
+ this.tokens = '',
+ this.dataDir = '',
+ this.lengthScale = 1.0,
+ this.dictDir = '',
+ this.lexicon = '',
+ });
+
+ @override
+ String toString() {
+ return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale, dictDir: $dictDir, lexicon: $lexicon)';
+ }
+
+ final String model;
+ final String voices;
+ final String tokens;
+ final String dataDir;
+ final double lengthScale;
+ final String dictDir;
+ final String lexicon;
+}
+
class OfflineTtsModelConfig {
const OfflineTtsModelConfig({
- required this.vits,
+ this.vits = const OfflineTtsVitsModelConfig(),
+ this.matcha = const OfflineTtsMatchaModelConfig(),
+ this.kokoro = const OfflineTtsKokoroModelConfig(),
this.numThreads = 1,
this.debug = true,
this.provider = 'cpu',
@@ -43,10 +97,12 @@ class OfflineTtsModelConfig {
@override
String toString() {
- return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)';
+ return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, kokoro: $kokoro, numThreads: $numThreads, debug: $debug, provider: $provider)';
}
final OfflineTtsVitsModelConfig vits;
+ final OfflineTtsMatchaModelConfig matcha;
+ final OfflineTtsKokoroModelConfig kokoro;
final int numThreads;
final bool debug;
final String provider;
@@ -82,6 +138,8 @@ class GeneratedAudio {
}
class OfflineTts {
+ OfflineTts.fromPtr({required this.ptr, required this.config});
+
OfflineTts._({required this.ptr, required this.config});
/// The user is responsible to call the OfflineTts.free()
@@ -97,6 +155,24 @@ class OfflineTts {
c.ref.model.vits.lengthScale = config.model.vits.lengthScale;
c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8();
+ c.ref.model.matcha.acousticModel =
+ config.model.matcha.acousticModel.toNativeUtf8();
+ c.ref.model.matcha.vocoder = config.model.matcha.vocoder.toNativeUtf8();
+ c.ref.model.matcha.lexicon = config.model.matcha.lexicon.toNativeUtf8();
+ c.ref.model.matcha.tokens = config.model.matcha.tokens.toNativeUtf8();
+ c.ref.model.matcha.dataDir = config.model.matcha.dataDir.toNativeUtf8();
+ c.ref.model.matcha.noiseScale = config.model.matcha.noiseScale;
+ c.ref.model.matcha.lengthScale = config.model.matcha.lengthScale;
+ c.ref.model.matcha.dictDir = config.model.matcha.dictDir.toNativeUtf8();
+
+ c.ref.model.kokoro.model = config.model.kokoro.model.toNativeUtf8();
+ c.ref.model.kokoro.voices = config.model.kokoro.voices.toNativeUtf8();
+ c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
+ c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
+ c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
+ c.ref.model.kokoro.dictDir = config.model.kokoro.dictDir.toNativeUtf8();
+ c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8();
+
c.ref.model.numThreads = config.model.numThreads;
c.ref.model.debug = config.model.debug ? 1 : 0;
c.ref.model.provider = config.model.provider.toNativeUtf8();
@@ -110,6 +186,21 @@ class OfflineTts {
calloc.free(c.ref.ruleFars);
calloc.free(c.ref.ruleFsts);
calloc.free(c.ref.model.provider);
+
+ calloc.free(c.ref.model.kokoro.lexicon);
+ calloc.free(c.ref.model.kokoro.dictDir);
+ calloc.free(c.ref.model.kokoro.dataDir);
+ calloc.free(c.ref.model.kokoro.tokens);
+ calloc.free(c.ref.model.kokoro.voices);
+ calloc.free(c.ref.model.kokoro.model);
+
+ calloc.free(c.ref.model.matcha.dictDir);
+ calloc.free(c.ref.model.matcha.dataDir);
+ calloc.free(c.ref.model.matcha.tokens);
+ calloc.free(c.ref.model.matcha.lexicon);
+ calloc.free(c.ref.model.matcha.vocoder);
+ calloc.free(c.ref.model.matcha.acousticModel);
+
calloc.free(c.ref.model.vits.dictDir);
calloc.free(c.ref.model.vits.dataDir);
calloc.free(c.ref.model.vits.tokens);
diff --git a/flutter/sherpa_onnx/lib/src/vad.dart b/flutter/sherpa_onnx/lib/src/vad.dart
index 10fac5a45a..7db0e55e05 100644
--- a/flutter/sherpa_onnx/lib/src/vad.dart
+++ b/flutter/sherpa_onnx/lib/src/vad.dart
@@ -54,6 +54,8 @@ class SpeechSegment {
}
class CircularBuffer {
+ CircularBuffer.fromPtr({required this.ptr});
+
CircularBuffer._({required this.ptr});
/// The user has to invoke CircularBuffer.free() on the returned instance
@@ -115,6 +117,8 @@ class CircularBuffer {
}
class VoiceActivityDetector {
+ VoiceActivityDetector.fromPtr({required this.ptr, required this.config});
+
VoiceActivityDetector._({required this.ptr, required this.config});
// The user has to invoke VoiceActivityDetector.free() to avoid memory leak.
diff --git a/flutter/sherpa_onnx/pubspec.yaml b/flutter/sherpa_onnx/pubspec.yaml
index 5b693ef0bf..b0b4c94b51 100644
--- a/flutter/sherpa_onnx/pubspec.yaml
+++ b/flutter/sherpa_onnx/pubspec.yaml
@@ -1,8 +1,8 @@
name: sherpa_onnx
description: >
- Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
- with onnxruntime without Internet connection.
+ Speech recognition, speech synthesis, speaker diarization, and speaker recognition
+ using next-gen Kaldi with onnxruntime without Internet connection.
repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter
@@ -12,12 +12,12 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/
topics:
- speech-recognition
- speech-synthesis
- - speaker-identification
+ - speaker-diarization
- audio-tagging
- voice-activity-detection
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
-version: 1.10.27
+version: 1.10.42
homepage: https://github.com/k2-fsa/sherpa-onnx
@@ -30,23 +30,23 @@ dependencies:
flutter:
sdk: flutter
- sherpa_onnx_android: ^1.10.27
+ sherpa_onnx_android: ^1.10.42
# sherpa_onnx_android:
# path: ../sherpa_onnx_android
- sherpa_onnx_macos: ^1.10.27
+ sherpa_onnx_macos: ^1.10.42
# sherpa_onnx_macos:
# path: ../sherpa_onnx_macos
- sherpa_onnx_linux: ^1.10.27
+ sherpa_onnx_linux: ^1.10.42
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
- #
- sherpa_onnx_windows: ^1.10.27
+
+ sherpa_onnx_windows: ^1.10.42
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows
- sherpa_onnx_ios: ^1.10.27
+ sherpa_onnx_ios: ^1.10.42
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios
diff --git a/flutter/sherpa_onnx_ios/README.md b/flutter/sherpa_onnx_ios/README.md
index 1334c25649..974250c30d 100644
--- a/flutter/sherpa_onnx_ios/README.md
+++ b/flutter/sherpa_onnx_ios/README.md
@@ -1,4 +1,4 @@
-# sherpa_onnx_linux
+# sherpa_onnx_ios
This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
diff --git a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
index ab4b74b97c..e4fa2e09ac 100644
--- a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
+++ b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
@@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_ios'
- s.version = '1.10.27'
+ s.version = '1.10.42'
s.summary = 'A new Flutter FFI plugin project.'
s.description = <<-DESC
A new Flutter FFI plugin project.
@@ -22,8 +22,9 @@ A new Flutter FFI plugin project.
# `../src/*` so that the C sources can be shared among all target platforms.
s.source = { :path => '.' }
s.dependency 'Flutter'
- s.platform = :ios, '12.0'
- s.ios.vendored_libraries = '*.dylib', '*.a'
+ s.platform = :ios, '13.0'
+ s.preserve_paths = 'sherpa_onnx.xcframework/**/*'
+ s.vendored_frameworks = 'sherpa_onnx.xcframework'
# Flutter.framework does not contain a i386 slice.
s.pod_target_xcconfig = {
diff --git a/flutter/sherpa_onnx_macos/README.md b/flutter/sherpa_onnx_macos/README.md
index 1334c25649..171c76752b 100644
--- a/flutter/sherpa_onnx_macos/README.md
+++ b/flutter/sherpa_onnx_macos/README.md
@@ -1,4 +1,4 @@
-# sherpa_onnx_linux
+# sherpa_onnx_macos
This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
diff --git a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
index 880e0217ea..ae701baedb 100644
--- a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
+++ b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
@@ -4,7 +4,7 @@
#
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_macos'
- s.version = '1.10.27'
+ s.version = '1.10.42'
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
s.description = <<-DESC
sherpa-onnx Flutter FFI plugin project.
diff --git a/flutter/sherpa_onnx_windows/README.md b/flutter/sherpa_onnx_windows/README.md
index 1334c25649..71c9109827 100644
--- a/flutter/sherpa_onnx_windows/README.md
+++ b/flutter/sherpa_onnx_windows/README.md
@@ -1,4 +1,4 @@
-# sherpa_onnx_linux
+# sherpa_onnx_windows
This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
diff --git a/go-api-examples/README.md b/go-api-examples/README.md
index 91f2c76e19..e16dab690e 100644
--- a/go-api-examples/README.md
+++ b/go-api-examples/README.md
@@ -6,28 +6,41 @@ Please refer to the documentation
https://k2-fsa.github.io/sherpa/onnx/go-api/index.html
for details.
+- [./add-punctuation](./add-punctuation) It shows how to use
+ a punctuation model to add punctuations to text
+
- [./non-streaming-decode-files](./non-streaming-decode-files) It shows how to use
a non-streaming ASR model to decode files
+- [./non-streaming-speaker-diarization](./non-streaming-speaker-diarization) It shows how to use
+ a speaker segmentation model and a speaker embedding model for speaker diarization.
+
- [./non-streaming-tts](./non-streaming-tts) It shows how to use a non-streaming TTS
model to convert text to speech
- [./real-time-speech-recognition-from-microphone](./real-time-speech-recognition-from-microphone)
It shows how to use a streaming ASR model to recognize speech from a microphone in real-time
+- [./speaker-identification](./speaker-identification) It shows how to use a speaker
+ embedding model for speaker identification.
+
+- [./streaming-decode-files](./streaming-decode-files) It shows how to use a streaming
+ model for streaming speech recognition
+
+- [./streaming-hlg-decoding](./streaming-hlg-decoding) It shows how to use a streaming
+ model for streaming speech recognition with HLG decoding
+
- [./vad](./vad) It shows how to use silero VAD with Golang.
-- [./vad-asr-whisper](./vad-asr-whisper) It shows how to use silero VAD + Whisper
+- [./vad-asr-paraformer](./vad-asr-paraformer) It shows how to use silero VAD + Paraformer
for speech recognition.
-- [./vad-asr-paraformer](./vad-asr-paraformer) It shows how to use silero VAD + Paraformer
+- [./vad-asr-whisper](./vad-asr-whisper) It shows how to use silero VAD + Whisper
+
+- [./vad-speaker-identification](./vad-speaker-identification) It shows how to use Go API for VAD + speaker identification.
for speech recognition.
- [./vad-spoken-language-identification](./vad-spoken-language-identification) It shows how to use silero VAD + Whisper
for spoken language identification.
-- [./speaker-identification](./speaker-identification) It shows how to use Go API for speaker identification.
-
-- [./vad-speaker-identification](./vad-speaker-identification) It shows how to use Go API for VAD + speaker identification.
-
[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
diff --git a/go-api-examples/add-punctuation/go.mod b/go-api-examples/add-punctuation/go.mod
new file mode 100644
index 0000000000..ec6d75805b
--- /dev/null
+++ b/go-api-examples/add-punctuation/go.mod
@@ -0,0 +1,3 @@
+module add-punctuation
+
+go 1.12
diff --git a/go-api-examples/add-punctuation/main.go b/go-api-examples/add-punctuation/main.go
new file mode 100644
index 0000000000..055748ea81
--- /dev/null
+++ b/go-api-examples/add-punctuation/main.go
@@ -0,0 +1,31 @@
+package main
+
+import (
+ sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+ "log"
+)
+
+func main() {
+ log.SetFlags(log.LstdFlags | log.Lmicroseconds)
+
+ config := sherpa.OfflinePunctuationConfig{}
+ config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
+ config.Model.NumThreads = 1
+ config.Model.Provider = "cpu"
+
+ punct := sherpa.NewOfflinePunctuation(&config)
+ defer sherpa.DeleteOfflinePunc(punct)
+
+ textArray := []string{
+ "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
+ "我们都是木头人不会说话不会动",
+ "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
+ }
+ log.Println("----------")
+ for _, text := range textArray {
+ newText := punct.AddPunct(text)
+ log.Printf("Input text: %v", text)
+ log.Printf("Output text: %v", newText)
+ log.Println("----------")
+ }
+}
diff --git a/go-api-examples/add-punctuation/run.sh b/go-api-examples/add-punctuation/run.sh
new file mode 100755
index 0000000000..6d43b84f09
--- /dev/null
+++ b/go-api-examples/add-punctuation/run.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
+ tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
+ rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./add-punctuation
diff --git a/go-api-examples/keyword-spotting-from-file/go.mod b/go-api-examples/keyword-spotting-from-file/go.mod
new file mode 100644
index 0000000000..dbd349a5ea
--- /dev/null
+++ b/go-api-examples/keyword-spotting-from-file/go.mod
@@ -0,0 +1,4 @@
+module keyword-spotting-from-file
+
+go 1.12
+
diff --git a/go-api-examples/keyword-spotting-from-file/main.go b/go-api-examples/keyword-spotting-from-file/main.go
new file mode 100644
index 0000000000..697f9f4d77
--- /dev/null
+++ b/go-api-examples/keyword-spotting-from-file/main.go
@@ -0,0 +1,81 @@
+package main
+
+import (
+ sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+ "log"
+)
+
+func main() {
+ log.SetFlags(log.LstdFlags | log.Lmicroseconds)
+
+ config := sherpa.KeywordSpotterConfig{}
+
+ // Please download the models from
+ // https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
+
+ config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
+ config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
+ config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
+ config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
+ config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
+ config.ModelConfig.NumThreads = 1
+ config.ModelConfig.Debug = 1
+
+ spotter := sherpa.NewKeywordSpotter(&config)
+ defer sherpa.DeleteKeywordSpotter(spotter)
+
+ wave_filename := "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
+
+ wave := sherpa.ReadWave(wave_filename)
+ if wave == nil {
+ log.Printf("Failed to read %v\n", wave_filename)
+ return
+ }
+
+ log.Println("----------Use pre-defined keywords----------")
+
+ stream := sherpa.NewKeywordStream(spotter)
+ defer sherpa.DeleteOnlineStream(stream)
+
+ stream.AcceptWaveform(wave.SampleRate, wave.Samples)
+
+ for spotter.IsReady(stream) {
+ spotter.Decode(stream)
+ result := spotter.GetResult(stream)
+ if result.Keyword != "" {
+ // You have to reset the stream right after detecting a keyword
+ spotter.Reset(stream)
+ log.Printf("Detected %v\n", result.Keyword)
+ }
+ }
+
+ log.Println("----------Use pre-defined keywords + add a new keyword----------")
+
+ stream2 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员")
+ defer sherpa.DeleteOnlineStream(stream2)
+
+ stream2.AcceptWaveform(wave.SampleRate, wave.Samples)
+
+ for spotter.IsReady(stream2) {
+ spotter.Decode(stream2)
+ result := spotter.GetResult(stream2)
+ if result.Keyword != "" {
+ log.Printf("Detected %v\n", result.Keyword)
+ }
+ }
+
+ log.Println("----------Use pre-defined keywords + add 2 new keywords----------")
+
+ stream3 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员/zh ī m íng @知名")
+ defer sherpa.DeleteOnlineStream(stream3)
+
+ stream3.AcceptWaveform(wave.SampleRate, wave.Samples)
+
+ for spotter.IsReady(stream3) {
+ spotter.Decode(stream3)
+ result := spotter.GetResult(stream3)
+ if result.Keyword != "" {
+ log.Printf("Detected %v\n", result.Keyword)
+ }
+ }
+}
diff --git a/go-api-examples/keyword-spotting-from-file/run.sh b/go-api-examples/keyword-spotting-from-file/run.sh
new file mode 100755
index 0000000000..89411f47a4
--- /dev/null
+++ b/go-api-examples/keyword-spotting-from-file/run.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+ tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+ rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+fi
+
+go mod tidy
+go build
+./keyword-spotting-from-file
diff --git a/go-api-examples/non-streaming-decode-files/main.go b/go-api-examples/non-streaming-decode-files/main.go
index 5373dcf29d..92b23dc19b 100644
--- a/go-api-examples/non-streaming-decode-files/main.go
+++ b/go-api-examples/non-streaming-decode-files/main.go
@@ -34,6 +34,11 @@ func main() {
flag.StringVar(&config.ModelConfig.Whisper.Task, "whisper-task", "transcribe", "transcribe or translate")
flag.IntVar(&config.ModelConfig.Whisper.TailPaddings, "whisper-tail-paddings", -1, "tail paddings for whisper")
+ flag.StringVar(&config.ModelConfig.Moonshine.Preprocessor, "moonshine-preprocessor", "", "Path to the moonshine preprocessor model")
+ flag.StringVar(&config.ModelConfig.Moonshine.Encoder, "moonshine-encoder", "", "Path to the moonshine encoder model")
+ flag.StringVar(&config.ModelConfig.Moonshine.UncachedDecoder, "moonshine-uncached-decoder", "", "Path to the moonshine uncached decoder model")
+ flag.StringVar(&config.ModelConfig.Moonshine.CachedDecoder, "moonshine-cached-decoder", "", "Path to the moonshine cached decoder model")
+
flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model")
@@ -85,12 +90,8 @@ func main() {
log.Println("Emotion: " + result.Emotion)
log.Println("Lang: " + result.Lang)
log.Println("Event: " + result.Event)
- for _, v := range result.Timestamps {
- log.Printf("Timestamp: %+v\n", v)
- }
- for _, v := range result.Tokens {
- log.Println("Token: " + v)
- }
+ log.Printf("Timestamp: %v\n", result.Timestamps)
+ log.Printf("Tokens: %v\n", result.Tokens)
log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
}
diff --git a/go-api-examples/non-streaming-decode-files/run-moonshine.sh b/go-api-examples/non-streaming-decode-files/run-moonshine.sh
new file mode 100755
index 0000000000..409101e4e3
--- /dev/null
+++ b/go-api-examples/non-streaming-decode-files/run-moonshine.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./non-streaming-decode-files \
+ --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+ --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+ --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+ --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+ --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+ ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
+
diff --git a/go-api-examples/non-streaming-speaker-diarization/go.mod b/go-api-examples/non-streaming-speaker-diarization/go.mod
new file mode 100644
index 0000000000..39edcecf56
--- /dev/null
+++ b/go-api-examples/non-streaming-speaker-diarization/go.mod
@@ -0,0 +1,3 @@
+module non-streaming-speaker-diarization
+
+go 1.12
diff --git a/go-api-examples/non-streaming-speaker-diarization/main.go b/go-api-examples/non-streaming-speaker-diarization/main.go
new file mode 100644
index 0000000000..7b975bf614
--- /dev/null
+++ b/go-api-examples/non-streaming-speaker-diarization/main.go
@@ -0,0 +1,87 @@
+package main
+
+import (
+ sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+ "log"
+)
+
+/*
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+*/
+
+func initSpeakerDiarization() *sherpa.OfflineSpeakerDiarization {
+ config := sherpa.OfflineSpeakerDiarizationConfig{}
+
+ config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
+ config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
+
+ // The test wave file contains 4 speakers, so we use 4 here
+ config.Clustering.NumClusters = 4
+
+ // if you don't know the actual numbers in the wave file,
+ // then please don't set NumClusters; you need to use
+ //
+ // config.Clustering.Threshold = 0.5
+ //
+
+ // A larger Threshold leads to fewer clusters
+ // A smaller Threshold leads to more clusters
+
+ sd := sherpa.NewOfflineSpeakerDiarization(&config)
+ return sd
+}
+
+func main() {
+ wave_filename := "./0-four-speakers-zh.wav"
+ wave := sherpa.ReadWave(wave_filename)
+ if wave == nil {
+ log.Printf("Failed to read %v", wave_filename)
+ return
+ }
+
+ sd := initSpeakerDiarization()
+ if sd == nil {
+ log.Printf("Please check your config")
+ return
+ }
+
+ defer sherpa.DeleteOfflineSpeakerDiarization(sd)
+
+ if wave.SampleRate != sd.SampleRate() {
+ log.Printf("Expected sample rate: %v, given: %d\n", sd.SampleRate(), wave.SampleRate)
+ return
+ }
+
+ log.Println("Started")
+ segments := sd.Process(wave.Samples)
+ n := len(segments)
+
+ for i := 0; i < n; i++ {
+ log.Printf("%.3f -- %.3f speaker_%02d\n", segments[i].Start, segments[i].End, segments[i].Speaker)
+ }
+}
diff --git a/go-api-examples/non-streaming-speaker-diarization/run.sh b/go-api-examples/non-streaming-speaker-diarization/run.sh
new file mode 100755
index 0000000000..1ebfd4aa1c
--- /dev/null
+++ b/go-api-examples/non-streaming-speaker-diarization/run.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+go mod tidy
+go build
+./non-streaming-speaker-diarization
diff --git a/go-api-examples/non-streaming-tts/main.go b/go-api-examples/non-streaming-tts/main.go
index 0ddeb8fe44..8a5d03a306 100644
--- a/go-api-examples/non-streaming-tts/main.go
+++ b/go-api-examples/non-streaming-tts/main.go
@@ -17,11 +17,30 @@ func main() {
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")
+ flag.StringVar(&config.Model.Matcha.DictDir, "vits-dict-dir", "", "Path to dict for jieba")
flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")
+ flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model")
+ flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model")
+ flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt")
+ flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt")
+ flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data")
+ flag.StringVar(&config.Model.Matcha.DictDir, "matcha-dict-dir", "", "Path to dict for jieba")
+
+ flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
+ flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower")
+
+ flag.StringVar(&config.Model.Kokoro.Model, "kokoro-model", "", "Path to the Kokoro ONNX model")
+ flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro")
+ flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro")
+ flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro")
+ flag.StringVar(&config.Model.Kokoro.DictDir, "kokoro-dict-dir", "", "Path to dict for Kokoro")
+ flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
+ flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
+
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
diff --git a/go-api-examples/non-streaming-tts/run-kokoro-en.sh b/go-api-examples/non-streaming-tts/run-kokoro-en.sh
new file mode 100755
index 0000000000..a7d356d1c2
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-kokoro-en.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+ tar xf kokoro-en-v0_19.tar.bz2
+ rm kokoro-en-v0_19.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+ --kokoro-model=./kokoro-en-v0_19/model.onnx \
+ --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+ --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+ --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+ --debug=1 \
+ --output-filename=./test-kokoro-en.wav \
+ "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
diff --git a/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh b/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh
new file mode 100755
index 0000000000..4ed74f90d5
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+ tar xf kokoro-multi-lang-v1_0.tar.bz2
+ rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+ --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
+ --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
+ --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
+ --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
+ --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \
+ --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+ --debug=1 \
+ --output-filename=./test-kokoro-zh-en.wav \
+ "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?"
diff --git a/go-api-examples/non-streaming-tts/run-matcha-en.sh b/go-api-examples/non-streaming-tts/run-matcha-en.sh
new file mode 100755
index 0000000000..f0932da56a
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-matcha-en.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+ --debug=1 \
+ --output-filename=./test-matcha-en.wav \
+ "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+
+
diff --git a/go-api-examples/non-streaming-tts/run-matcha-zh.sh b/go-api-examples/non-streaming-tts/run-matcha-zh.sh
new file mode 100755
index 0000000000..ef4165d042
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-matcha-zh.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+ --debug=1 \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --output-filename=./test-matcha-zh.wav \
+ "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
+
diff --git a/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh b/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh
index 15e4f1dbd8..6f8c98e80a 100755
--- a/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh
+++ b/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh
@@ -4,7 +4,7 @@ set -ex
if [ ! -d vits-piper-en_US-lessac-medium ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
- tar xvf vits-piper-en_US-lessac-medium.tar.bz2
+ tar xf vits-piper-en_US-lessac-medium.tar.bz2
rm vits-piper-en_US-lessac-medium.tar.bz2
fi
diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/go.mod b/go-api-examples/real-time-speech-recognition-from-microphone/go.mod
index 5d6a5b784b..636d6f797d 100644
--- a/go-api-examples/real-time-speech-recognition-from-microphone/go.mod
+++ b/go-api-examples/real-time-speech-recognition-from-microphone/go.mod
@@ -1,3 +1,7 @@
module real-time-speech-recognition-from-microphone
go 1.12
+
+require (
+ github.com/csukuangfj/portaudio-go v1.0.3
+)
diff --git a/harmony-os/.gitignore b/harmony-os/.gitignore
new file mode 100644
index 0000000000..dd2f4066e6
--- /dev/null
+++ b/harmony-os/.gitignore
@@ -0,0 +1,2 @@
+!build-profile.json5
+*.har
diff --git a/harmony-os/README.md b/harmony-os/README.md
new file mode 100644
index 0000000000..63a530cc0a
--- /dev/null
+++ b/harmony-os/README.md
@@ -0,0 +1,23 @@
+# Introduction
+
+- [./SherpaOnnxHar](./SherpaOnnxHar) It is for building `sherpa_onnx.har`.
+ If you don't need to change the C++ or Typescript code of sherpa-onnx, then
+ you can download pre-built `sherpa_onnx.har` from us. Just run `ohpm install sherpa_onnx`.
+ Please refer to our [doc](https://k2-fsa.github.io/sherpa/onnx/harmony-os/how-to-build-har.html)
+ if you want to build `sherpa-onnx` from source.
+
+- [./SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization) It shows how
+ to run on-device speaker diarization.
+
+- [./SherpaOnnxSpeakerIdentification](./SherpaOnnxSpeakerIdentification) It shows how to use
+ speaker embedding models for on-device speaker identification.
+
+- [./SherpaOnnxStreamingAsr](./SherpaOnnxStreamingAsr) It shows how to use
+ streaming ASR models for real-time on-device speech recognition.
+
+- [./SherpaOnnxTts](./SherpaOnnxTts) It shows how to run on-device text-to-speech.
+ Please see the doc at
+
+- [./SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It shows how to use
+ VAD + Non-streaming ASR for speech recognition.
+ Please see the doc at
diff --git a/harmony-os/SherpaOnnxHar/.gitignore b/harmony-os/SherpaOnnxHar/.gitignore
new file mode 100644
index 0000000000..d2ff20141c
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/.gitignore
@@ -0,0 +1,12 @@
+/node_modules
+/oh_modules
+/local.properties
+/.idea
+**/build
+/.hvigor
+.cxx
+/.clangd
+/.clang-format
+/.clang-tidy
+**/.test
+/.appanalyzer
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/AppScope/app.json5 b/harmony-os/SherpaOnnxHar/AppScope/app.json5
new file mode 100644
index 0000000000..8f5c08b90b
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/AppScope/app.json5
@@ -0,0 +1,10 @@
+{
+ "app": {
+ "bundleName": "com.k2fsa.sherpa.onnx",
+ "vendor": "example",
+ "versionCode": 1000000,
+ "versionName": "1.0.0",
+ "icon": "$media:app_icon",
+ "label": "$string:app_name"
+ }
+}
diff --git a/harmony-os/SherpaOnnxHar/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxHar/AppScope/resources/base/element/string.json
new file mode 100644
index 0000000000..a0fa21ba74
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/AppScope/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+ "string": [
+ {
+ "name": "app_name",
+ "value": "SherpaOnnxHar"
+ }
+ ]
+}
diff --git a/harmony-os/SherpaOnnxHar/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxHar/AppScope/resources/base/media/app_icon.png
new file mode 100644
index 0000000000..a39445dc87
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/AppScope/resources/base/media/app_icon.png differ
diff --git a/harmony-os/SherpaOnnxHar/README.md b/harmony-os/SherpaOnnxHar/README.md
new file mode 100644
index 0000000000..a378f73ccd
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/README.md
@@ -0,0 +1,6 @@
+# Introduction
+
+How to build `sherpa_onnx.har` from the command line
+----------------------------------------------------
+
+Please see https://k2-fsa.github.io/sherpa/onnx/harmony-os/how-to-build-har.html
diff --git a/harmony-os/SherpaOnnxHar/build-profile.json5 b/harmony-os/SherpaOnnxHar/build-profile.json5
new file mode 100644
index 0000000000..2b12adad05
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/build-profile.json5
@@ -0,0 +1,44 @@
+{
+ "app": {
+ "signingConfigs": [],
+ "products": [
+ {
+ "name": "default",
+ "signingConfig": "default",
+ "compatibleSdkVersion": "4.0.0(10)",
+ "runtimeOS": "HarmonyOS",
+ "buildOption": {
+ "strictMode": {
+ "caseSensitiveCheck": true,
+ }
+ }
+ }
+ ],
+ "buildModeSet": [
+ {
+ "name": "debug",
+ },
+ {
+ "name": "release"
+ }
+ ]
+ },
+ "modules": [
+ {
+ "name": "entry",
+ "srcPath": "./entry",
+ "targets": [
+ {
+ "name": "default",
+ "applyToProducts": [
+ "default"
+ ]
+ }
+ ]
+ },
+ {
+ "name": "sherpa_onnx",
+ "srcPath": "./sherpa_onnx",
+ }
+ ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/code-linter.json5 b/harmony-os/SherpaOnnxHar/code-linter.json5
new file mode 100644
index 0000000000..77b31b517a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/code-linter.json5
@@ -0,0 +1,20 @@
+{
+ "files": [
+ "**/*.ets"
+ ],
+ "ignore": [
+ "**/src/ohosTest/**/*",
+ "**/src/test/**/*",
+ "**/src/mock/**/*",
+ "**/node_modules/**/*",
+ "**/oh_modules/**/*",
+ "**/build/**/*",
+ "**/.preview/**/*"
+ ],
+ "ruleSet": [
+ "plugin:@performance/recommended",
+ "plugin:@typescript-eslint/recommended"
+ ],
+ "rules": {
+ }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/.gitignore b/harmony-os/SherpaOnnxHar/entry/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/build-profile.json5 b/harmony-os/SherpaOnnxHar/entry/build-profile.json5
new file mode 100644
index 0000000000..4d611879c7
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/build-profile.json5
@@ -0,0 +1,28 @@
+{
+ "apiType": "stageMode",
+ "buildOption": {
+ },
+ "buildOptionSet": [
+ {
+ "name": "release",
+ "arkOptions": {
+ "obfuscation": {
+ "ruleOptions": {
+ "enable": false,
+ "files": [
+ "./obfuscation-rules.txt"
+ ]
+ }
+ }
+ }
+ },
+ ],
+ "targets": [
+ {
+ "name": "default"
+ },
+ {
+ "name": "ohosTest",
+ }
+ ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/hvigorfile.ts b/harmony-os/SherpaOnnxHar/entry/hvigorfile.ts
new file mode 100644
index 0000000000..c6edcd9048
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { hapTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+ system: hapTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
+ plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxHar/entry/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+# https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/oh-package.json5 b/harmony-os/SherpaOnnxHar/entry/oh-package.json5
new file mode 100644
index 0000000000..248c3b7541
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/oh-package.json5
@@ -0,0 +1,10 @@
+{
+ "name": "entry",
+ "version": "1.0.0",
+ "description": "Please describe the basic information.",
+ "main": "",
+ "author": "",
+ "license": "",
+ "dependencies": {}
+}
+
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entryability/EntryAbility.ets
new file mode 100644
index 0000000000..679d914538
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entryability/EntryAbility.ets
@@ -0,0 +1,43 @@
+import AbilityConstant from '@ohos.app.ability.AbilityConstant';
+import hilog from '@ohos.hilog';
+import UIAbility from '@ohos.app.ability.UIAbility';
+import Want from '@ohos.app.ability.Want';
+import window from '@ohos.window';
+
+export default class EntryAbility extends UIAbility {
+ onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
+ hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
+ }
+
+ onDestroy(): void {
+ hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
+ }
+
+ onWindowStageCreate(windowStage: window.WindowStage): void {
+ // Main window is created, set main page for this ability
+ hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
+
+ windowStage.loadContent('pages/Index', (err) => {
+ if (err.code) {
+ hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
+ return;
+ }
+ hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
+ });
+ }
+
+ onWindowStageDestroy(): void {
+ // Main window is destroyed, release UI related resources
+ hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
+ }
+
+ onForeground(): void {
+ // Ability has brought to foreground
+ hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
+ }
+
+ onBackground(): void {
+ // Ability has back to background
+ hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
+ }
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
new file mode 100644
index 0000000000..d2c48b4212
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
@@ -0,0 +1,12 @@
+import hilog from '@ohos.hilog';
+import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
+
+export default class EntryBackupAbility extends BackupExtensionAbility {
+ async onBackup() {
+ hilog.info(0x0000, 'testTag', 'onBackup ok');
+ }
+
+ async onRestore(bundleVersion: BundleVersion) {
+ hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
+ }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxHar/entry/src/main/ets/pages/Index.ets
new file mode 100644
index 0000000000..423b4276ec
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/ets/pages/Index.ets
@@ -0,0 +1,17 @@
+@Entry
+@Component
+struct Index {
+ @State message: string = 'Hello World';
+
+ build() {
+ Row() {
+ Column() {
+ Text(this.message)
+ .fontSize(50)
+ .fontWeight(FontWeight.Bold)
+ }
+ .width('100%')
+ }
+ .height('100%')
+ }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/module.json5 b/harmony-os/SherpaOnnxHar/entry/src/main/module.json5
new file mode 100644
index 0000000000..a1cea8b6a4
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/module.json5
@@ -0,0 +1,52 @@
+{
+ "module": {
+ "name": "entry",
+ "type": "entry",
+ "description": "$string:module_desc",
+ "mainElement": "EntryAbility",
+ "deviceTypes": [
+ "phone",
+ "tablet",
+ "2in1"
+ ],
+ "deliveryWithInstall": true,
+ "installationFree": false,
+ "pages": "$profile:main_pages",
+ "abilities": [
+ {
+ "name": "EntryAbility",
+ "srcEntry": "./ets/entryability/EntryAbility.ets",
+ "description": "$string:EntryAbility_desc",
+ "icon": "$media:layered_image",
+ "label": "$string:EntryAbility_label",
+ "startWindowIcon": "$media:startIcon",
+ "startWindowBackground": "$color:start_window_background",
+ "exported": true,
+ "skills": [
+ {
+ "entities": [
+ "entity.system.home"
+ ],
+ "actions": [
+ "action.system.home"
+ ]
+ }
+ ]
+ }
+ ],
+ "extensionAbilities": [
+ {
+ "name": "EntryBackupAbility",
+ "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
+ "type": "backup",
+ "exported": false,
+ "metadata": [
+ {
+ "name": "ohos.extension.backup",
+ "resource": "$profile:backup_config"
+ }
+ ],
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/color.json
new file mode 100644
index 0000000000..3c712962da
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/color.json
@@ -0,0 +1,8 @@
+{
+ "color": [
+ {
+ "name": "start_window_background",
+ "value": "#FFFFFF"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..f94595515a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/string.json
@@ -0,0 +1,16 @@
+{
+ "string": [
+ {
+ "name": "module_desc",
+ "value": "module description"
+ },
+ {
+ "name": "EntryAbility_desc",
+ "value": "description"
+ },
+ {
+ "name": "EntryAbility_label",
+ "value": "label"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/background.png
new file mode 100644
index 0000000000..f939c9fa8c
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/background.png differ
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/foreground.png
new file mode 100644
index 0000000000..4483ddad1f
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/foreground.png differ
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/layered_image.json
new file mode 100644
index 0000000000..fb49920440
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/layered_image.json
@@ -0,0 +1,7 @@
+{
+ "layered-image":
+ {
+ "background" : "$media:background",
+ "foreground" : "$media:foreground"
+ }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/startIcon.png
new file mode 100644
index 0000000000..205ad8b5a8
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/startIcon.png differ
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/backup_config.json
new file mode 100644
index 0000000000..78f40ae7c4
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/backup_config.json
@@ -0,0 +1,3 @@
+{
+ "allowToBackupRestore": true
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/main_pages.json
new file mode 100644
index 0000000000..1898d94f58
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/main_pages.json
@@ -0,0 +1,5 @@
+{
+ "src": [
+ "pages/Index"
+ ]
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..f94595515a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/en_US/element/string.json
@@ -0,0 +1,16 @@
+{
+ "string": [
+ {
+ "name": "module_desc",
+ "value": "module description"
+ },
+ {
+ "name": "EntryAbility_desc",
+ "value": "description"
+ },
+ {
+ "name": "EntryAbility_label",
+ "value": "label"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..597ecf95e6
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,16 @@
+{
+ "string": [
+ {
+ "name": "module_desc",
+ "value": "模块描述"
+ },
+ {
+ "name": "EntryAbility_desc",
+ "value": "description"
+ },
+ {
+ "name": "EntryAbility_label",
+ "value": "label"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+ describe('ActsAbilityTest', () => {
+ // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+ beforeAll(() => {
+ // Presets an action, which is performed only once before all test cases of the test suite start.
+ // This API supports only one parameter: preset action function.
+ })
+ beforeEach(() => {
+ // Presets an action, which is performed before each unit test case starts.
+ // The number of execution times is the same as the number of test cases defined by **it**.
+ // This API supports only one parameter: preset action function.
+ })
+ afterEach(() => {
+ // Presets a clear action, which is performed after each unit test case ends.
+ // The number of execution times is the same as the number of test cases defined by **it**.
+ // This API supports only one parameter: clear action function.
+ })
+ afterAll(() => {
+ // Presets a clear action, which is performed after all test cases of the test suite end.
+ // This API supports only one parameter: clear action function.
+ })
+ it('assertContain', 0, () => {
+ // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+ hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+ let a = 'abc';
+ let b = 'b';
+ // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+ expect(a).assertContain(b);
+ expect(a).assertEqual(a);
+ })
+ })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+ abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/module.json5
new file mode 100644
index 0000000000..55725a9299
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+ "module": {
+ "name": "entry_test",
+ "type": "feature",
+ "deviceTypes": [
+ "phone",
+ "tablet",
+ "2in1"
+ ],
+ "deliveryWithInstall": true,
+ "installationFree": false
+ }
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxHar/entry/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+ localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxHar/entry/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+ describe('localUnitTest', () => {
+ // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+ beforeAll(() => {
+ // Presets an action, which is performed only once before all test cases of the test suite start.
+ // This API supports only one parameter: preset action function.
+ });
+ beforeEach(() => {
+ // Presets an action, which is performed before each unit test case starts.
+ // The number of execution times is the same as the number of test cases defined by **it**.
+ // This API supports only one parameter: preset action function.
+ });
+ afterEach(() => {
+ // Presets a clear action, which is performed after each unit test case ends.
+ // The number of execution times is the same as the number of test cases defined by **it**.
+ // This API supports only one parameter: clear action function.
+ });
+ afterAll(() => {
+ // Presets a clear action, which is performed after all test cases of the test suite end.
+ // This API supports only one parameter: clear action function.
+ });
+ it('assertContain', 0, () => {
+ // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+ let a = 'abc';
+ let b = 'b';
+ // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+ expect(a).assertContain(b);
+ expect(a).assertEqual(a);
+ });
+ });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxHar/hvigor/hvigor-config.json5
new file mode 100644
index 0000000000..06b2783670
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/hvigor/hvigor-config.json5
@@ -0,0 +1,22 @@
+{
+ "modelVersion": "5.0.0",
+ "dependencies": {
+ },
+ "execution": {
+ // "analyze": "normal", /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
+ // "daemon": true, /* Enable daemon compilation. Value: [ true | false ]. Default: true */
+ // "incremental": true, /* Enable incremental compilation. Value: [ true | false ]. Default: true */
+ // "parallel": true, /* Enable parallel compilation. Value: [ true | false ]. Default: true */
+ // "typeCheck": false, /* Enable typeCheck. Value: [ true | false ]. Default: false */
+ },
+ "logging": {
+ // "level": "info" /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
+ },
+ "debugging": {
+ // "stacktrace": false /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
+ },
+ "nodeOptions": {
+ // "maxOldSpaceSize": 8192 /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
+ // "exposeGC": true /* Enable to trigger garbage collection explicitly. Default: true*/
+ }
+}
diff --git a/harmony-os/SherpaOnnxHar/hvigorfile.ts b/harmony-os/SherpaOnnxHar/hvigorfile.ts
new file mode 100644
index 0000000000..f3cb9f1a87
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { appTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+ system: appTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
+ plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxHar/notes.md b/harmony-os/SherpaOnnxHar/notes.md
new file mode 100644
index 0000000000..6926a7bb6d
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/notes.md
@@ -0,0 +1,13 @@
+# Notes
+
+## How to publish a package
+
+Please see
+ -
+ -
+ -
+
+## How to sign the HAP file from commandline
+
+Please see
+
diff --git a/harmony-os/SherpaOnnxHar/oh-package-lock.json5 b/harmony-os/SherpaOnnxHar/oh-package-lock.json5
new file mode 100644
index 0000000000..f538ae290f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/oh-package-lock.json5
@@ -0,0 +1,19 @@
+{
+ "meta": {
+ "stableOrder": true
+ },
+ "lockfileVersion": 3,
+ "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+ "specifiers": {
+ "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
+ },
+ "packages": {
+ "@ohos/hypium@1.0.19": {
+ "name": "@ohos/hypium",
+ "version": "1.0.19",
+ "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
+ "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
+ "registryType": "ohpm"
+ }
+ }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/oh-package.json5 b/harmony-os/SherpaOnnxHar/oh-package.json5
new file mode 100644
index 0000000000..a79d5300e5
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/oh-package.json5
@@ -0,0 +1,9 @@
+{
+ "modelVersion": "5.0.0",
+ "description": "Please describe the basic information.",
+ "dependencies": {
+ },
+ "devDependencies": {
+ "@ohos/hypium": "1.0.19"
+ }
+}
diff --git a/harmony-os/SherpaOnnxHar/release.sh b/harmony-os/SherpaOnnxHar/release.sh
new file mode 100755
index 0000000000..cc33364fb1
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/release.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -ex
+
+export PATH=/Users/fangjun/software/command-line-tools/bin:$PATH
+
+hvigorw clean --no-daemon
+hvigorw --mode module -p product=default -p module=sherpa_onnx@default assembleHar --analyze=normal --parallel --incremental --no-daemon
+
+ohpm publish ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/.gitignore b/harmony-os/SherpaOnnxHar/sherpa_onnx/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
new file mode 100644
index 0000000000..ea97166bc9
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
@@ -0,0 +1,17 @@
+/**
+ * Use these variables when you tailor your ArkTS code. They must be of the const type.
+ */
+export const HAR_VERSION = '1.10.41';
+export const BUILD_MODE_NAME = 'debug';
+export const DEBUG = true;
+export const TARGET_NAME = 'default';
+
+/**
+ * BuildProfile Class is used only for compatibility purposes.
+ */
+export default class BuildProfile {
+ static readonly HAR_VERSION = HAR_VERSION;
+ static readonly BUILD_MODE_NAME = BUILD_MODE_NAME;
+ static readonly DEBUG = DEBUG;
+ static readonly TARGET_NAME = TARGET_NAME;
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
new file mode 100644
index 0000000000..84286294a6
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
@@ -0,0 +1,60 @@
+export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";
+
+export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';
+
+
+export { Samples,
+ OfflineStream,
+ FeatureConfig,
+ OfflineTransducerModelConfig,
+ OfflineParaformerModelConfig,
+ OfflineNemoEncDecCtcModelConfig,
+ OfflineWhisperModelConfig,
+ OfflineTdnnModelConfig,
+ OfflineSenseVoiceModelConfig,
+ OfflineMoonshineModelConfig,
+ OfflineModelConfig,
+ OfflineLMConfig,
+ OfflineRecognizerConfig,
+ OfflineRecognizerResult,
+ OfflineRecognizer,
+} from './src/main/ets/components/NonStreamingAsr';
+
+export { OnlineStream,
+ OnlineTransducerModelConfig,
+ OnlineParaformerModelConfig,
+ OnlineZipformer2CtcModelConfig,
+ OnlineModelConfig,
+ OnlineCtcFstDecoderConfig,
+ OnlineRecognizerConfig,
+ OnlineRecognizerResult,
+ OnlineRecognizer,
+} from './src/main/ets/components/StreamingAsr';
+
+export { OfflineTtsKokoroModelConfig,
+ OfflineTtsMatchaModelConfig,
+ OfflineTtsVitsModelConfig,
+ OfflineTtsModelConfig,
+ OfflineTtsConfig,
+ OfflineTts,
+ TtsOutput,
+ TtsInput,
+} from './src/main/ets/components/NonStreamingTts';
+
+export { SpeakerEmbeddingExtractorConfig,
+ SpeakerEmbeddingExtractor,
+ SpeakerEmbeddingManager,
+} from './src/main/ets/components/SpeakerIdentification';
+
+export { OfflineSpeakerSegmentationPyannoteModelConfig,
+ OfflineSpeakerSegmentationModelConfig,
+ OfflineSpeakerDiarizationConfig,
+ OfflineSpeakerDiarizationSegment,
+ OfflineSpeakerDiarization,
+ FastClusteringConfig,
+} from './src/main/ets/components/NonStreamingSpeakerDiarization';
+
+export { KeywordSpotterConfig,
+ KeywordSpotterResult,
+ KeywordSpotter,
+} from './src/main/ets/components/KeywordSpotting';
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/README.md b/harmony-os/SherpaOnnxHar/sherpa_onnx/README.md
new file mode 100644
index 0000000000..95fc7bdd75
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/README.md
@@ -0,0 +1,46 @@
+# Introduction
+
+[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is one of the deployment
+frameworks of [Next-gen Kaldi](https://github.com/k2-fsa).
+
+It supports speech-to-text, text-to-speech, speaker diarization, and VAD using
+onnxruntime without Internet connection.
+
+It also supports embedded systems, Android, iOS, HarmonyOS,
+Raspberry Pi, RISC-V, x86_64 servers, websocket server/client,
+C/C++, Python, Kotlin, C#, Go, NodeJS, Java, Swift, Dart, JavaScript,
+Flutter, Object Pascal, Lazarus, Rust, etc.
+
+
+# Installation
+
+To use `sherpa-onnx` in your project, please either use
+
+```
+ohpm install sherpa_onnx
+```
+or update your `oh-package.json5` to include the following:
+
+```
+ "dependencies": {
+ "sherpa_onnx": "1.10.42",
+ },
+```
+
+Note that we recommend always using the latest version.
+
+# Examples
+
+| Demo | URL | Description|
+|------|-----|------------|
+|SherpaOnnxStreamingAsr|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxStreamingAsr)|On-device real-time/streaming speech recognition with Next-gen Kaldi|
+|SherpaOnnxVadAsr|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxVadAsr)|It shows how to use VAD with a non-streaming ASR model for on-device speech recognition without accessing the network |
+|SherpaOnnxTts|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxTts)|It shows how to use Next-gen Kaldi for on-device text-to-speech (TTS, i.e., speech synthesis)|
+|SherpaOnnxSpeakerDiarization|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxSpeakerDiarization)|On-device speaker diarization with Next-gen Kaldi|
+|SherpaOnnxSpeakerIdentification|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxSpeakerIdentification)|On-device speaker identification with Next-gen Kaldi|
+
+# Documentation
+
+If you have any issues, please either look at our doc at
+ or create an issue at
+
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5
new file mode 100644
index 0000000000..905c571273
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5
@@ -0,0 +1,46 @@
+{
+ "apiType": "stageMode",
+ "buildOption": {
+ "externalNativeOptions": {
+ "path": "./src/main/cpp/CMakeLists.txt",
+ "arguments": "",
+ "cppFlags": "-std=c++17",
+ "abiFilters": [
+ "arm64-v8a",
+ "x86_64",
+ ],
+ },
+ },
+ "buildOptionSet": [
+ {
+ "name": "release",
+ "arkOptions": {
+ "obfuscation": {
+ "ruleOptions": {
+ "enable": false,
+ "files": [
+ "./obfuscation-rules.txt"
+ ]
+ },
+ "consumerFiles": [
+ "./consumer-rules.txt"
+ ]
+ }
+ },
+ "nativeLib": {
+ "debugSymbol": {
+ "strip": true,
+ "exclude": []
+ }
+ }
+ },
+ ],
+ "targets": [
+ {
+ "name": "default"
+ },
+ {
+ "name": "ohosTest"
+ }
+ ]
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/consumer-rules.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/consumer-rules.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/hvigorfile.ts b/harmony-os/SherpaOnnxHar/sherpa_onnx/hvigorfile.ts
new file mode 100644
index 0000000000..4218707148
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { harTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+ system: harTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
+ plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/obfuscation-rules.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+# https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package-lock.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package-lock.json5
new file mode 100644
index 0000000000..2585b2e83d
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package-lock.json5
@@ -0,0 +1,18 @@
+{
+ "meta": {
+ "stableOrder": true
+ },
+ "lockfileVersion": 3,
+ "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+ "specifiers": {
+ "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx"
+ },
+ "packages": {
+ "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx": {
+ "name": "libsherpa_onnx.so",
+ "version": "1.0.0",
+ "resolved": "src/main/cpp/types/libsherpa_onnx",
+ "registryType": "local"
+ }
+ }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package.json5
new file mode 100644
index 0000000000..52b9733345
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package.json5
@@ -0,0 +1,28 @@
+{
+ "name": "sherpa_onnx",
+ "version": "1.10.42",
+ "description": "On-device speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without Internet connection",
+ "main": "Index.ets",
+ "author": "The next-gen Kaldi team",
+ "license": "Apache-2.0",
+ "homepage": "https://github.com/k2-fsa/sherpa-onnx",
+ "repository": "https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxHar",
+ "dependencies": {
+ "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
+ },
+ "keywords": [
+ "语音识别",
+ "语音合成",
+ "说话人日志",
+ "新一代Kaldi",
+ "不联网",
+ "本地",
+ "tts",
+ "asr",
+ "privacy",
+ "open-source",
+ ],
+ "bugs": {
+ "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
+ },
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt
new file mode 100644
index 0000000000..26dda1789e
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt
@@ -0,0 +1,74 @@
+# the minimum version of CMake.
+cmake_minimum_required(VERSION 3.13.0)
+project(myNpmLib)
+
+if (NOT CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to use")
+endif()
+
+# Disable warning about
+#
+# "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
+# not set.
+if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
+ cmake_policy(SET CMP0135 NEW)
+endif()
+
+set(NATIVERENDER_ROOT_PATH ${CMAKE_CURRENT_SOURCE_DIR})
+
+if(DEFINED PACKAGE_FIND_FILE)
+ include(${PACKAGE_FIND_FILE})
+endif()
+
+include_directories(${NATIVERENDER_ROOT_PATH}
+ ${NATIVERENDER_ROOT_PATH}/include)
+
+include(FetchContent)
+FetchContent_Declare(node_addon_api
+ GIT_REPOSITORY "https://github.com/nodejs/node-addon-api.git"
+ GIT_TAG c679f6f4c9dc6bf9fc0d99cbe5982bd24a5e2c7b
+ PATCH_COMMAND git checkout . && git apply --ignore-whitespace "${CMAKE_CURRENT_LIST_DIR}/my-patch.diff"
+)
+FetchContent_MakeAvailable(node_addon_api)
+FetchContent_GetProperties(node_addon_api)
+if(NOT node_addon_api_POPULATED)
+ message(STATUS "Downloading node-addon-api from")
+ FetchContent_Populate(node_addon_api)
+endif()
+
+message(STATUS "node-addon-api is downloaded to ${node_addon_api_SOURCE_DIR}")
+include_directories(${node_addon_api_SOURCE_DIR})
+
+add_library(sherpa_onnx SHARED
+ audio-tagging.cc
+ keyword-spotting.cc
+ non-streaming-asr.cc
+ non-streaming-speaker-diarization.cc
+ non-streaming-tts.cc
+ punctuation.cc
+ sherpa-onnx-node-addon-api.cc
+ speaker-identification.cc
+ spoken-language-identification.cc
+ streaming-asr.cc
+ utils.cc
+ vad.cc
+ wave-reader.cc
+ wave-writer.cc
+)
+
+add_library(sherpa_onnx_c_api SHARED IMPORTED)
+set_target_properties(sherpa_onnx_c_api
+ PROPERTIES
+ IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/libs/${OHOS_ARCH}/libsherpa-onnx-c-api.so)
+
+add_library(onnxruntime SHARED IMPORTED)
+set_target_properties(onnxruntime
+ PROPERTIES
+ IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/libs/${OHOS_ARCH}/libonnxruntime.so)
+
+
+target_link_libraries(sherpa_onnx PUBLIC libace_napi.z.so
+ libhilog_ndk.z.so # for hilog
+ librawfile.z.so
+ sherpa_onnx_c_api onnxruntime
+)
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc
new file mode 100644
index 0000000000..f4d6ac5391
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc
@@ -0,0 +1,216 @@
+// scripts/node-addon-api/src/audio-tagging.cc
+//
+// Copyright (c) 2024 Xiaomi Corporation
+#include
+
+#include "macros.h" // NOLINT
+#include "napi.h" // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static SherpaOnnxOfflineZipformerAudioTaggingModelConfig
+GetAudioTaggingZipformerModelConfig(Napi::Object obj) {
+ SherpaOnnxOfflineZipformerAudioTaggingModelConfig c;
+ memset(&c, 0, sizeof(c));
+
+ if (!obj.Has("zipformer") || !obj.Get("zipformer").IsObject()) {
+ return c;
+ }
+
+ Napi::Object o = obj.Get("zipformer").As();
+
+ SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+
+ return c;
+}
+
+static SherpaOnnxAudioTaggingModelConfig GetAudioTaggingModelConfig(
+ Napi::Object obj) {
+ SherpaOnnxAudioTaggingModelConfig c;
+ memset(&c, 0, sizeof(c));
+
+ if (!obj.Has("model") || !obj.Get("model").IsObject()) {
+ return c;
+ }
+
+ Napi::Object o = obj.Get("model").As();
+ c.zipformer = GetAudioTaggingZipformerModelConfig(o);
+
+ SHERPA_ONNX_ASSIGN_ATTR_STR(ced, ced);
+
+ SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+ if (o.Has("debug") &&
+ (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+ if (o.Get("debug").IsBoolean()) {
+ c.debug = o.Get("debug").As().Value();
+ } else {
+ c.debug = o.Get("debug").As().Int32Value();
+ }
+ }
+ SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+ return c;
+}
+
+static Napi::External CreateAudioTaggingWrapper(
+ const Napi::CallbackInfo &info) {
+ Napi::Env env = info.Env();
+ if (info.Length() != 1) {
+ std::ostringstream os;
+ os << "Expect only 1 argument. Given: " << info.Length();
+
+ Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ if (!info[0].IsObject()) {
+ Napi::TypeError::New(env, "You should pass an object as the only argument.")
+ .ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ Napi::Object o = info[0].As();
+
+ SherpaOnnxAudioTaggingConfig c;
+ memset(&c, 0, sizeof(c));
+ c.model = GetAudioTaggingModelConfig(o);
+
+ SHERPA_ONNX_ASSIGN_ATTR_STR(labels, labels);
+ SHERPA_ONNX_ASSIGN_ATTR_INT32(top_k, topK);
+
+ const SherpaOnnxAudioTagging *at = SherpaOnnxCreateAudioTagging(&c);
+
+ SHERPA_ONNX_DELETE_C_STR(c.model.zipformer.model);
+ SHERPA_ONNX_DELETE_C_STR(c.model.ced);
+ SHERPA_ONNX_DELETE_C_STR(c.model.provider);
+ SHERPA_ONNX_DELETE_C_STR(c.labels);
+
+ if (!at) {
+ Napi::TypeError::New(env, "Please check your config!")
+ .ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ return Napi::External::New(
+ env, const_cast(at),
+ [](Napi::Env env, SherpaOnnxAudioTagging *at) {
+ SherpaOnnxDestroyAudioTagging(at);
+ });
+}
+
+static Napi::External
+AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) {
+ Napi::Env env = info.Env();
+ if (info.Length() != 1) {
+ std::ostringstream os;
+ os << "Expect only 1 argument. Given: " << info.Length();
+
+ Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ if (!info[0].IsExternal()) {
+ Napi::TypeError::New(
+ env, "You should pass an audio tagging pointer as the only argument")
+ .ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ SherpaOnnxAudioTagging *at =
+ info[0].As>().Data();
+
+ const SherpaOnnxOfflineStream *stream =
+ SherpaOnnxAudioTaggingCreateOfflineStream(at);
+
+ return Napi::External::New(
+ env, const_cast(stream),
+ [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
+ SherpaOnnxDestroyOfflineStream(stream);
+ });
+}
+
+static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
+ Napi::Env env = info.Env();
+ if (info.Length() != 3) {
+ std::ostringstream os;
+ os << "Expect only 3 arguments. Given: " << info.Length();
+
+ Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ if (!info[0].IsExternal()) {
+ Napi::TypeError::New(
+ env, "You should pass an audio tagging pointer as the first argument")
+ .ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ if (!info[1].IsExternal()) {
+ Napi::TypeError::New(
+ env, "You should pass an offline stream pointer as the second argument")
+ .ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ if (!info[2].IsNumber()) {
+ Napi::TypeError::New(env,
+ "You should pass an integer as the third argument")
+ .ThrowAsJavaScriptException();
+
+ return {};
+ }
+
+ SherpaOnnxAudioTagging *at =
+ info[0].As>().Data();
+
+ SherpaOnnxOfflineStream *stream =
+ info[1].As>().Data();
+
+ int32_t top_k = info[2].As().Int32Value();
+
+ const SherpaOnnxAudioEvent *const *events =
+ SherpaOnnxAudioTaggingCompute(at, stream, top_k);
+
+ auto p = events;
+ int32_t k = 0;
+ while (p && *p) {
+ ++k;
+ ++p;
+ }
+
+ Napi::Array ans = Napi::Array::New(env, k);
+ for (uint32_t i = 0; i != k; ++i) {
+ Napi::Object obj = Napi::Object::New(env);
+ obj.Set(Napi::String::New(env, "name"),
+ Napi::String::New(env, events[i]->name));
+ obj.Set(Napi::String::New(env, "index"),
+ Napi::Number::New(env, events[i]->index));
+ obj.Set(Napi::String::New(env, "prob"),
+ Napi::Number::New(env, events[i]->prob));
+ ans[i] = obj;
+ }
+
+ SherpaOnnxAudioTaggingFreeResults(events);
+
+ return ans;
+}
+
+void InitAudioTagging(Napi::Env env, Napi::Object exports) {
+ exports.Set(Napi::String::New(env, "createAudioTagging"),
+ Napi::Function::New(env, CreateAudioTaggingWrapper));
+
+ exports.Set(Napi::String::New(env, "audioTaggingCreateOfflineStream"),
+ Napi::Function::New(env, AudioTaggingCreateOfflineStreamWrapper));
+
+ exports.Set(Napi::String::New(env, "audioTaggingCompute"),
+ Napi::Function::New(env, AudioTaggingComputeWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/README.md b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/README.md
new file mode 100644
index 0000000000..95744c221f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/README.md
@@ -0,0 +1,8 @@
+# Node
+
+[./c-api.h](./c-api.h) is a symbolic link to
+https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/c-api.h
+
+If you are using Windows, then you need to manually replace this file with
+https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/c-api.h
+since Windows does not support symbolic links.
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/c-api.h b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/c-api.h
new file mode 120000
index 0000000000..d9c1b82e10
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/c-api.h
@@ -0,0 +1 @@
+../../../../../../../../../sherpa-onnx/c-api/c-api.h
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc
new file mode 100644
index 0000000000..6562ef5a1a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc
@@ -0,0 +1,310 @@
+// scripts/node-addon-api/src/keyword-spotting.cc
+//
+// Copyright (c) 2024 Xiaomi Corporation
+#include