diff --git a/.github/scripts/node-addon/package-optional.json b/.github/scripts/node-addon/package-optional.json index b3c71f9dad..d2db2e1920 100644 --- a/.github/scripts/node-addon/package-optional.json +++ b/.github/scripts/node-addon/package-optional.json @@ -1,7 +1,7 @@ { "name": "sherpa-onnx-PLATFORM2-ARCH", "version": "SHERPA_ONNX_VERSION", - "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection", + "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", "main": "index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" @@ -16,8 +16,18 @@ "transcription", "real-time speech recognition", "without internet connection", + "locally", + "local", "embedded systems", "open source", + "diarization", + "speaker diarization", + "speaker recognition", + "speaker", + "speaker segmentation", + "speaker verification", + "spoken language identification", + "sherpa", "zipformer", "asr", "tts", @@ -30,13 +40,13 @@ "offline", "privacy", "open source", - "vad", - "speaker id", - "language id", - "node-addon-api", "streaming speech recognition", "speech", - "recognition" + "recognition", + "vad", + "node-addon-api", + "speaker id", + "language id" ], "author": "The next-gen Kaldi team", "license": "Apache-2.0", diff --git a/.github/scripts/node-addon/package.json b/.github/scripts/node-addon/package.json index 0444552fc3..bc2d89e89c 100644 --- a/.github/scripts/node-addon/package.json +++ b/.github/scripts/node-addon/package.json @@ -1,7 +1,7 @@ { "name": "sherpa-onnx-node", "version": "SHERPA_ONNX_VERSION", - "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection", + "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", "main": "sherpa-onnx.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" @@ -16,8 +16,18 @@ "transcription", "real-time speech recognition", "without internet connection", + "locally", + "local", "embedded systems", "open source", + "diarization", + "speaker diarization", + "speaker recognition", + "speaker", + "speaker segmentation", + "speaker verification", + "spoken language identification", + "sherpa", "zipformer", "asr", "tts", @@ -30,13 +40,13 @@ "offline", "privacy", "open source", - "vad", - "speaker id", - "language id", - "node-addon-api", "streaming speech recognition", "speech", - "recognition" + "recognition", + "vad", + "node-addon-api", + "speaker id", + "language id" ], "author": "The next-gen Kaldi team", "license": "Apache-2.0", diff --git a/.github/scripts/test-cxx-api.sh b/.github/scripts/test-cxx-api.sh new file mode 100755 index 0000000000..aedf161337 --- /dev/null +++ b/.github/scripts/test-cxx-api.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE" +echo "CXX_WHISPER_EXE is $CXX_WHISPER_EXE" +echo "CXX_SENSE_VOICE_EXE is $CXX_SENSE_VOICE_EXE" +echo "PATH: $PATH" + +log "------------------------------------------------------------" +log "Test streaming zipformer CXX API" +log "------------------------------------------------------------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +$CXX_STREAMING_ZIPFORMER_EXE +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + +log "------------------------------------------------------------" +log "Test Whisper CXX API" +log "------------------------------------------------------------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 +rm sherpa-onnx-whisper-tiny.en.tar.bz2 +$CXX_WHISPER_EXE +rm -rf sherpa-onnx-whisper-tiny.en + +log "------------------------------------------------------------" +log "Test SenseVoice CXX API" +log "------------------------------------------------------------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + +$CXX_SENSE_VOICE_EXE +rm -rf sherpa-onnx-sense-voice-* diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh index 0aff2085e7..27199ae9f7 100755 --- a/.github/scripts/test-dart.sh +++ b/.github/scripts/test-dart.sh @@ -4,6 +4,38 @@ set -ex cd dart-api-examples +pushd tts + +echo '----------matcha tts----------' +./run-kokoro-zh-en.sh +./run-kokoro-en.sh +./run-matcha-zh.sh +./run-matcha-en.sh +ls -lh *.wav +rm -rf matcha-icefall-* +rm *.onnx + +echo '----------piper tts----------' +./run-piper.sh +rm -rf vits-piper-* + +echo '----------coqui tts----------' +./run-coqui.sh +rm -rf vits-coqui-* + +echo '----------zh tts----------' +./run-vits-zh.sh +rm -rf sherpa-onnx-* + +ls -lh *.wav + +popd # tts + +pushd speaker-diarization +echo '----------speaker diarization----------' +./run.sh +popd + pushd speaker-identification echo '----------3d speaker----------' ./run-3d-speaker.sh @@ -31,6 +63,10 @@ echo "----zipformer transducer----" ./run-zipformer-transducer.sh rm -rf sherpa-onnx-* +echo "----moonshine----" +./run-moonshine.sh +rm -rf sherpa-onnx-* + echo "----whisper----" ./run-whisper.sh rm -rf sherpa-onnx-* @@ -72,6 +108,10 @@ echo '----------TeleSpeech CTC----------' ./run-telespeech-ctc.sh rm -rf sherpa-onnx-* +echo '----------moonshine----------' +./run-moonshine.sh +rm -rf sherpa-onnx-* + echo '----------whisper----------' ./run-whisper.sh rm -rf sherpa-onnx-* @@ -93,22 +133,6 @@ rm -rf sherpa-onnx-* popd # non-streaming-asr -pushd tts - -echo '----------piper tts----------' -./run-piper.sh -rm -rf vits-piper-* - -echo '----------coqui tts----------' -./run-coqui.sh -rm -rf vits-coqui-* - -echo '----------zh tts----------' -./run-zh.sh -rm -rf sherpa-onnx-* - -popd # tts - pushd streaming-asr echo '----------streaming zipformer ctc HLG----------' diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index c397fc0cdf..aa41ad985d 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -2,7 +2,41 @@ cd dotnet-examples/ -cd ./offline-decode-files +cd ./kokoro-tts +./run-kokoro.sh +ls -lh + +cd ../offline-tts +./run-matcha-zh.sh +ls -lh *.wav +./run-matcha-en.sh +ls -lh *.wav +./run-aishell3.sh +ls -lh *.wav +./run-piper.sh +ls -lh *.wav +./run-hf-fanchen.sh +ls -lh *.wav +ls -lh + +pushd ../.. + +mkdir tts + +cp -v dotnet-examples/kokoro-tts/*.wav ./tts +cp -v dotnet-examples/offline-tts/*.wav ./tts +popd + +cd ../offline-speaker-diarization +./run.sh +rm -rfv *.onnx +rm -fv *.wav +rm -rfv sherpa-onnx-pyannote-* + +cd ../offline-decode-files +./run-moonshine.sh +rm -rf sherpa-onnx-* + ./run-sense-voice-ctc.sh rm -rf sherpa-onnx-* @@ -67,14 +101,4 @@ cd ../spoken-language-identification ./run.sh rm -rf sherpa-onnx-* -cd ../offline-tts -./run-aishell3.sh -./run-piper.sh -./run-hf-fanchen.sh -ls -lh - -cd ../.. - -mkdir tts -cp dotnet-examples/offline-tts/*.wav ./tts diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index a46e2de8ed..53db04d739 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -10,7 +10,34 @@ arch=$(node -p "require('os').arch()") platform=$(node -p "require('os').platform()") node_version=$(node -p "process.versions.node.split('.')[0]") -echo "----------non-streaming asr + vad----------" +echo "----------non-streaming asr moonshine + vad----------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + +node ./test_vad_with_non_streaming_asr_moonshine.js +rm -rf sherpa-onnx-* +rm *.wav +rm *.onnx + +echo "----------non-streaming speaker diarization----------" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +node ./test_offline_speaker_diarization.js + +rm -rfv *.onnx *.wav sherpa-onnx-pyannote-* + +echo "----------non-streaming asr whisper + vad----------" curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 rm sherpa-onnx-whisper-tiny.en.tar.bz2 @@ -58,6 +85,41 @@ fi echo "----------tts----------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 +tar xf kokoro-multi-lang-v1_0.tar.bz2 +rm kokoro-multi-lang-v1_0.tar.bz2 + +node ./test_tts_non_streaming_kokoro_zh_en.js +ls -lh *.wav +rm -rf kokoro-multi-lang-v1_0 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 +tar xf kokoro-en-v0_19.tar.bz2 +rm kokoro-en-v0_19.tar.bz2 + +node ./test_tts_non_streaming_kokoro_en.js +ls -lh *.wav +rm -rf kokoro-en-v0_19 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 +rm matcha-icefall-en_US-ljspeech.tar.bz2 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +node ./test_tts_non_streaming_matcha_icefall_en.js +rm hifigan_v2.onnx +rm -rf matcha-icefall-en_US-ljspeech + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 +tar xvf matcha-icefall-zh-baker.tar.bz2 +rm matcha-icefall-zh-baker.tar.bz2 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +node ./test_tts_non_streaming_matcha_icefall_zh.js +rm hifigan_v2.onnx +rm -rf matcha-icefall-zh-baker +ls -lh *.wav + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2 tar xf vits-piper-en_GB-cori-medium.tar.bz2 rm vits-piper-en_GB-cori-medium.tar.bz2 @@ -204,6 +266,11 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2 node ./test_asr_non_streaming_whisper.js rm -rf sherpa-onnx-whisper-tiny.en +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +node ./test_asr_non_streaming_moonshine.js +rm -rf sherpa-onnx-* ls -lh diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index c41a0de658..536310af7b 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -9,6 +9,94 @@ git status ls -lh ls -lh node_modules +# offline tts +# +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 +tar xf kokoro-multi-lang-v1_0.tar.bz2 +rm kokoro-multi-lang-v1_0.tar.bz2 + +node ./test-offline-tts-kokoro-zh-en.js +ls -lh *.wav +rm -rf kokoro-multi-lang-v1_0 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 +tar xf kokoro-en-v0_19.tar.bz2 +rm kokoro-en-v0_19.tar.bz2 + +node ./test-offline-tts-kokoro-en.js +rm -rf kokoro-en-v0_19 + +ls -lh + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 +tar xvf matcha-icefall-zh-baker.tar.bz2 +rm matcha-icefall-zh-baker.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +node ./test-offline-tts-matcha-zh.js + +rm -rf matcha-icefall-zh-baker +rm hifigan_v2.onnx + +echo "---" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 +rm matcha-icefall-en_US-ljspeech.tar.bz2 + +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +node ./test-offline-tts-matcha-en.js + +rm -rf matcha-icefall-en_US-ljspeech +rm hifigan_v2.onnx + +echo "---" + +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 +tar xf vits-piper-en_US-amy-low.tar.bz2 +node ./test-offline-tts-vits-en.js +rm -rf vits-piper-en_US-amy-low* + +echo "---" + +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 +tar xvf vits-icefall-zh-aishell3.tar.bz2 +node ./test-offline-tts-vits-zh.js +rm -rf vits-icefall-zh-aishell3* + +ls -lh *.wav + +echo '-----speaker diarization----------' +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +node ./test-offline-speaker-diarization.js +rm -rfv *.wav *.onnx sherpa-onnx-pyannote-* + +echo '-----vad+moonshine----------' + +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 +rm sherpa-onnx-whisper-tiny.en.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +node ./test-vad-with-non-streaming-asr-whisper.js +rm Obama.wav +rm silero_vad.onnx +rm -rf sherpa-onnx-moonshine-* + echo '-----vad+whisper----------' curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 @@ -78,6 +166,13 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2 node ./test-offline-whisper.js rm -rf sherpa-onnx-whisper-tiny.en +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + +node ./test-offline-moonshine.js +rm -rf sherpa-onnx-moonshine-* + # online asr curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 @@ -111,15 +206,3 @@ tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 node ./test-online-zipformer2-ctc-hlg.js rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 - -# offline tts - -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 -tar xf vits-piper-en_US-amy-low.tar.bz2 -node ./test-offline-tts-en.js -rm -rf vits-piper-en_US-amy-low* - -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 -tar xvf vits-icefall-zh-aishell3.tar.bz2 -node ./test-offline-tts-zh.js -rm -rf vits-icefall-zh-aishell3* diff --git a/.github/scripts/test-offline-ctc.sh b/.github/scripts/test-offline-ctc.sh index 57208e9da2..f85b585398 100755 --- a/.github/scripts/test-offline-ctc.sh +++ b/.github/scripts/test-offline-ctc.sh @@ -15,6 +15,21 @@ echo "PATH: $PATH" which $EXE +log "------------------------------------------------------------" +log "Run NeMo GigaAM Russian models" +log "------------------------------------------------------------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2 +tar xvf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2 +rm sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2 + +$EXE \ + --nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx \ + --tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt \ + --debug=1 \ + ./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/test_wavs/example.wav + +rm -rf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24 + log "------------------------------------------------------------" log "Run SenseVoice models" log "------------------------------------------------------------" diff --git a/.github/scripts/test-offline-moonshine.sh b/.github/scripts/test-offline-moonshine.sh new file mode 100755 index 0000000000..1768e82ecd --- /dev/null +++ b/.github/scripts/test-offline-moonshine.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -e + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +export GIT_CLONE_PROTECTION_ACTIVE=false + +echo "EXE is $EXE" +echo "PATH: $PATH" + +which $EXE + +names=( +tiny +base +) + +for name in ${names[@]}; do + log "------------------------------------------------------------" + log "Run $name" + log "------------------------------------------------------------" + + repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-$name.tar.bz2 + repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-$name-en-int8.tar.bz2 + curl -SL -O $repo_url + tar xvf sherpa-onnx-moonshine-$name-en-int8.tar.bz2 + rm sherpa-onnx-moonshine-$name-en-int8.tar.bz2 + repo=sherpa-onnx-moonshine-$name-en-int8 + log "Start testing ${repo_url}" + + log "test int8 onnx" + + time $EXE \ + --moonshine-preprocessor=$repo/preprocess.onnx \ + --moonshine-encoder=$repo/encode.int8.onnx \ + --moonshine-uncached-decoder=$repo/uncached_decode.int8.onnx \ + --moonshine-cached-decoder=$repo/cached_decode.int8.onnx \ + --tokens=$repo/tokens.txt \ + --num-threads=2 \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/8k.wav + + rm -rf $repo +done diff --git a/.github/scripts/test-offline-tts.sh b/.github/scripts/test-offline-tts.sh index d3d35df2cb..baa2b37bb9 100755 --- a/.github/scripts/test-offline-tts.sh +++ b/.github/scripts/test-offline-tts.sh @@ -18,6 +18,87 @@ which $EXE # test waves are saved in ./tts mkdir ./tts +log "------------------------------------------------------------" +log "kokoro-en-v0_19" +log "------------------------------------------------------------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 +tar xf kokoro-en-v0_19.tar.bz2 +rm kokoro-en-v0_19.tar.bz2 + +# mapping of sid to voice name +# 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam +# 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis + +for sid in $(seq 0 10); do + $EXE \ + --debug=1 \ + --kokoro-model=./kokoro-en-v0_19/model.onnx \ + --kokoro-voices=./kokoro-en-v0_19/voices.bin \ + --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \ + --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \ + --num-threads=2 \ + --sid=$sid \ + --output-filename="./tts/kokoro-$sid.wav" \ + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be a statesman, a businessman, an official, or a scholar." +done +rm -rf kokoro-en-v0_19 + +log "------------------------------------------------------------" +log "matcha-icefall-en_US-ljspeech" +log "------------------------------------------------------------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 +rm matcha-icefall-en_US-ljspeech.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +$EXE \ + --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ + --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ + --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ + --num-threads=2 \ + --output-filename=./tts/matcha-ljspeech-1.wav \ + --debug=1 \ + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." + +rm hifigan_v2.onnx +rm -rf matcha-icefall-en_US-ljspeech + +log "------------------------------------------------------------" +log "matcha-icefall-zh-baker" +log "------------------------------------------------------------" +curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 +tar xvf matcha-icefall-zh-baker.tar.bz2 +rm matcha-icefall-zh-baker.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +$EXE \ + --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ + --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ + --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ + --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ + --num-threads=2 \ + --debug=1 \ + --output-filename=./tts/matcha-baker-zh-1.wav \ + '小米的使命是,始终坚持做"感动人心、价格厚道"的好产品,让全球每个人都能享受科技带来的美好生活' + +$EXE \ + --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ + --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ + --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ + --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ + --num-threads=2 \ + --debug=1 \ + --output-filename=./tts/matcha-baker-zh-2.wav \ + "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" + +rm hifigan_v2.onnx +rm -rf matcha-icefall-zh-baker + log "------------------------------------------------------------" log "vits-piper-en_US-amy-low" log "------------------------------------------------------------" diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh index de7297f2c3..dd4da51207 100755 --- a/.github/scripts/test-python.sh +++ b/.github/scripts/test-python.sh @@ -8,6 +8,52 @@ log() { echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" } +log "test offline zipformer (byte-level bpe, Chinese+English)" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2 +tar xvf sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2 +rm sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2 + +repo=sherpa-onnx-zipformer-zh-en-2023-11-22 + +./python-api-examples/offline-decode-files.py \ + --tokens=$repo/tokens.txt \ + --encoder=$repo/encoder-epoch-34-avg-19.int8.onnx \ + --decoder=$repo/decoder-epoch-34-avg-19.onnx \ + --joiner=$repo/joiner-epoch-34-avg-19.int8.onnx \ + --num-threads=2 \ + --decoding-method=greedy_search \ + --debug=true \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + +rm -rf sherpa-onnx-zipformer-zh-en-2023-11-22 + +log "test offline Moonshine" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + +python3 ./python-api-examples/offline-moonshine-decode-files.py + +rm -rf sherpa-onnx-moonshine-tiny-en-int8 + +log "test offline speaker diarization" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +python3 ./python-api-examples/offline-speaker-diarization.py + +rm -rf *.wav *.onnx ./sherpa-onnx-pyannote-segmentation-3-0 + + log "test_clustering" pushd /tmp/ mkdir test-cluster @@ -221,6 +267,87 @@ log "Offline TTS test" # test waves are saved in ./tts mkdir ./tts +log "kokoro-multi-lang-v1_0 test" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 +tar xf kokoro-multi-lang-v1_0.tar.bz2 +rm kokoro-multi-lang-v1_0.tar.bz2 + +python3 ./python-api-examples/offline-tts.py \ + --debug=1 \ + --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \ + --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \ + --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \ + --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \ + --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \ + --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \ + --num-threads=2 \ + --sid=18 \ + --output-filename="./tts/kokoro-18-zh-en.wav" \ + "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" + +rm -rf kokoro-multi-lang-v1_0 + +log "kokoro-en-v0_19 test" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 +tar xf kokoro-en-v0_19.tar.bz2 +rm kokoro-en-v0_19.tar.bz2 + +python3 ./python-api-examples/offline-tts.py \ + --debug=1 \ + --kokoro-model=./kokoro-en-v0_19/model.onnx \ + --kokoro-voices=./kokoro-en-v0_19/voices.bin \ + --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \ + --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \ + --num-threads=2 \ + --sid=10 \ + --output-filename="./tts/kokoro-10.wav" \ + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be a statesman, a businessman, an official, or a scholar." + +rm -rf kokoro-en-v0_19 + +log "matcha-ljspeech-en test" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 +rm matcha-icefall-en_US-ljspeech.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +python3 ./python-api-examples/offline-tts.py \ + --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ + --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ + --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ + --output-filename=./tts/test-matcha-ljspeech-en.wav \ + --num-threads=2 \ + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." + +rm hifigan_v2.onnx +rm -rf matcha-icefall-en_US-ljspeech + +log "matcha-baker-zh test" + +curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 +tar xvf matcha-icefall-zh-baker.tar.bz2 +rm matcha-icefall-zh-baker.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + +python3 ./python-api-examples/offline-tts.py \ + --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ + --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ + --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ + --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ + --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ + --output-filename=./tts/test-matcha-baker-zh.wav \ + "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" + +rm -rf matcha-icefall-zh-baker +rm hifigan_v2.onnx + log "vits-ljs test" curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx @@ -468,53 +595,19 @@ echo "sherpa_onnx version: $sherpa_onnx_version" pwd ls -lh -repo=sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01 -log "Start testing ${repo}" - -pushd $dir -curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz -tar xf sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz -rm sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz -popd - -repo=$dir/$repo -ls -lh $repo - -python3 ./python-api-examples/keyword-spotter.py \ - --tokens=$repo/tokens.txt \ - --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \ - --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \ - --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \ - --keywords-file=$repo/test_wavs/test_keywords.txt \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav - -rm -rf $repo - if [[ x$OS != x'windows-latest' ]]; then echo "OS: $OS" repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 log "Start testing ${repo}" - pushd $dir curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz - popd - repo=$dir/$repo ls -lh $repo - python3 ./python-api-examples/keyword-spotter.py \ - --tokens=$repo/tokens.txt \ - --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \ - --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \ - --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \ - --keywords-file=$repo/test_wavs/test_keywords.txt \ - $repo/test_wavs/3.wav \ - $repo/test_wavs/4.wav \ - $repo/test_wavs/5.wav + python3 ./python-api-examples/keyword-spotter.py python3 sherpa-onnx/python/tests/test_keyword_spotter.py --verbose diff --git a/.github/scripts/test-speaker-diarization.sh b/.github/scripts/test-speaker-diarization.sh new file mode 100755 index 0000000000..6d7b2effd0 --- /dev/null +++ b/.github/scripts/test-speaker-diarization.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +echo "EXE is $EXE" +echo "PATH: $PATH" + +which $EXE + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +log "specify number of clusters" +$EXE \ + --clustering.num-clusters=4 \ + --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \ + --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \ + ./0-four-speakers-zh.wav + +log "specify threshold for clustering" + +$EXE \ + --clustering.cluster-threshold=0.90 \ + --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \ + --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \ + ./0-four-speakers-zh.wav + +rm -rf sherpa-onnx-pyannote-* +rm -fv *.onnx +rm -fv *.wav diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh index 18c9bed418..65fe4588aa 100755 --- a/.github/scripts/test-swift.sh +++ b/.github/scripts/test-swift.sh @@ -7,6 +7,31 @@ echo "pwd: $PWD" cd swift-api-examples ls -lh +./run-tts-vits.sh +ls -lh +rm -rf vits-piper-* + +./run-tts-kokoro-zh-en.sh +ls -lh +rm -rf kokoro-multi-* + +./run-tts-kokoro-en.sh +ls -lh +rm -rf kokoro-en-* + +./run-tts-matcha-zh.sh +ls -lh +rm -rf matcha-icefall-* + +./run-tts-matcha-en.sh +ls -lh +rm -rf matcha-icefall-* + +./run-speaker-diarization.sh +rm -rf *.onnx +rm -rf sherpa-onnx-pyannote-segmentation-3-0 +rm -fv *.wav + ./run-add-punctuations.sh rm ./add-punctuations rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 @@ -33,8 +58,9 @@ popd ls -lh /Users/fangjun/Desktop cat /Users/fangjun/Desktop/Obama.srt -./run-tts.sh -ls -lh +rm -rf sherpa-onnx-whisper* +rm -f *.onnx +rm /Users/fangjun/Desktop/Obama.wav ./run-decode-file.sh rm decode-file @@ -43,5 +69,4 @@ sed -i.bak '20d' ./decode-file.swift ./run-decode-file-non-streaming.sh - ls -lh diff --git a/.github/workflows/aarch64-linux-gnu-shared.yaml b/.github/workflows/aarch64-linux-gnu-shared.yaml index 5e82d9b3ad..1851645251 100644 --- a/.github/workflows/aarch64-linux-gnu-shared.yaml +++ b/.github/workflows/aarch64-linux-gnu-shared.yaml @@ -9,7 +9,6 @@ on: - 'v[0-9]+.[0-9]+.[0-9]+*' paths: - '.github/workflows/aarch64-linux-gnu-shared.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/aarch64-linux-gnu-shared.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -34,11 +32,20 @@ concurrency: jobs: aarch64_linux_gnu_shared: runs-on: ${{ matrix.os }} - name: aarch64 shared lib test + name: aarch64 shared GPU ${{ matrix.gpu }} ${{ matrix.onnxruntime_version }} strategy: fail-fast: false matrix: - os: [ubuntu-latest] + include: + - os: ubuntu-latest + gpu: ON + onnxruntime_version: "1.11.0" + - os: ubuntu-latest + gpu: ON + onnxruntime_version: "1.16.0" + - os: ubuntu-latest + gpu: OFF + onnxruntime_version: "" steps: - uses: actions/checkout@v4 @@ -61,7 +68,7 @@ jobs: if: steps.cache-qemu.outputs.cache-hit != 'true' run: | sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build + sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev. - name: checkout-qemu if: steps.cache-qemu.outputs.cache-hit != 'true' @@ -79,15 +86,24 @@ jobs: make -j2 make install - - name: cache-toolchain - id: cache-toolchain + - name: cache-toolchain (CPU) + if: matrix.gpu == 'OFF' + id: cache-toolchain-cpu uses: actions/cache@v4 with: path: toolchain key: gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz - - name: Download toolchain - if: steps.cache-toolchain.outputs.cache-hit != 'true' + - name: cache-toolchain (GPU) + if: matrix.gpu == 'ON' + id: cache-toolchain-gpu + uses: actions/cache@v4 + with: + path: toolchain + key: gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz + + - name: Download toolchain (CPU, gcc 7.5) + if: steps.cache-toolchain-cpu.outputs.cache-hit != 'true' && matrix.gpu == 'OFF' shell: bash run: | wget -qq https://huggingface.co/csukuangfj/sherpa-ncnn-toolchains/resolve/main/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz @@ -95,6 +111,15 @@ jobs: mkdir $GITHUB_WORKSPACE/toolchain tar xf ./gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain + - name: Download toolchain (GPU, gcc 10.3) + if: steps.cache-toolchain-gpu.outputs.cache-hit != 'true' && matrix.gpu == 'ON' + shell: bash + run: | + wget -qq https://huggingface.co/csukuangfj/sherpa-ncnn-toolchains/resolve/main/gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz + + mkdir $GITHUB_WORKSPACE/toolchain + tar xf ./gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain + - name: Set environment variable if: steps.cache-build-result.outputs.cache-hit != 'true' shell: bash @@ -103,19 +128,31 @@ jobs: echo "$GITHUB_WORKSPACE/bin" >> "$GITHUB_PATH" ls -lh "$GITHUB_WORKSPACE/toolchain/bin" - echo "CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV" - echo "CXX=aarch64-linux-gnu-g++" >> "$GITHUB_ENV" + if [[ ${{ matrix.gpu }} == OFF ]]; then + echo "CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV" + echo "CXX=aarch64-linux-gnu-g++" >> "$GITHUB_ENV" + else + echo "CC=aarch64-none-linux-gnu-gcc" >> "$GITHUB_ENV" + echo "CXX=aarch64-none-linux-gnu-g++" >> "$GITHUB_ENV" + fi - name: Display toolchain info shell: bash run: | - aarch64-linux-gnu-gcc --version + if [[ ${{ matrix.gpu }} == OFF ]]; then + which aarch64-linux-gnu-gcc + aarch64-linux-gnu-gcc --version + else + which aarch64-none-linux-gnu-gcc + aarch64-none-linux-gnu-gcc --version + fi - name: Display qemu-aarch64 -h shell: bash run: | export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc + export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-none-linux-gnu/libc qemu-aarch64 -h - name: build aarch64-linux-gnu @@ -127,6 +164,8 @@ jobs: cmake --version export BUILD_SHARED_LIBS=ON + export SHERPA_ONNX_ENABLE_GPU=${{ matrix.gpu }} + export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=${{ matrix.onnxruntime_version }} ./build-aarch64-linux-gnu.sh @@ -140,7 +179,11 @@ jobs: run: | export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH - export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc + if [[ ${{ matrix.gpu }} == OFF ]]; then + export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc + else + export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-none-linux-gnu/libc + fi ls -lh ./build-aarch64-linux-gnu/bin @@ -151,11 +194,20 @@ jobs: - name: Copy files shell: bash run: | - aarch64-linux-gnu-strip --version + if [[ ${{ matrix.gpu }} == OFF ]]; then + aarch64-linux-gnu-strip --version + else + aarch64-none-linux-gnu-strip --version + fi SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-shared + if [[ ${{ matrix.gpu }} == OFF ]]; then + dst=${dst}-cpu + else + dst=${dst}-gpu-onnxruntime-${{ matrix.onnxruntime_version }} + fi mkdir $dst cp -a build-aarch64-linux-gnu/install/bin $dst/ @@ -166,7 +218,11 @@ jobs: ls -lh $dst/bin/ echo "strip" - aarch64-linux-gnu-strip $dst/bin/* + if [[ ${{ matrix.gpu }} == OFF ]]; then + aarch64-linux-gnu-strip $dst/bin/* + else + aarch64-none-linux-gnu-strip $dst/bin/* + fi tree $dst @@ -174,8 +230,8 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: sherpa-onnx-linux-aarch64-shared - path: sherpa-onnx-*linux-aarch64-shared.tar.bz2 + name: sherpa-onnx-linux-aarch64-shared-gpu-${{ matrix.gpu }}-onnxruntime-${{ matrix.onnxruntime_version }} + path: sherpa-onnx-*linux-aarch64-shared*.tar.bz2 # https://huggingface.co/docs/hub/spaces-github-actions - name: Publish to huggingface @@ -193,12 +249,12 @@ jobs: rm -rf huggingface export GIT_CLONE_PROTECTION_ACTIVE=false - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface mkdir -p aarch64 - cp -v ../sherpa-onnx-*-shared.tar.bz2 ./aarch64 + cp -v ../sherpa-onnx-*-shared*.tar.bz2 ./aarch64 git status git lfs track "*.bz2" diff --git a/.github/workflows/aarch64-linux-gnu-static.yaml b/.github/workflows/aarch64-linux-gnu-static.yaml index 765e2422f3..66ce6ec244 100644 --- a/.github/workflows/aarch64-linux-gnu-static.yaml +++ b/.github/workflows/aarch64-linux-gnu-static.yaml @@ -9,7 +9,6 @@ on: - 'v[0-9]+.[0-9]+.[0-9]+*' paths: - '.github/workflows/aarch64-linux-gnu-static.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/aarch64-linux-gnu-static.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -61,7 +59,7 @@ jobs: if: steps.cache-qemu.outputs.cache-hit != 'true' run: | sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build + sudo apt-get install build-essential zlib1g-dev pkg-config libglib2.0-dev binutils-dev libboost-all-dev autoconf libtool libssl-dev libpixman-1-dev ninja-build - name: checkout-qemu if: steps.cache-qemu.outputs.cache-hit != 'true' @@ -184,7 +182,7 @@ jobs: rm -rf huggingface export GIT_CLONE_PROTECTION_ACTIVE=false - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface mkdir -p aarch64 diff --git a/.github/workflows/add-new-asr-models.yaml b/.github/workflows/add-new-asr-models.yaml new file mode 100644 index 0000000000..6bd2230f15 --- /dev/null +++ b/.github/workflows/add-new-asr-models.yaml @@ -0,0 +1,61 @@ +name: add-new-asr-models + +on: + # push: + # branches: + # - new-asr-models + workflow_dispatch: + +concurrency: + group: add-new-asr-models-${{ github.ref }} + cancel-in-progress: true + +jobs: + add-new-asr-models: + runs-on: ${{ matrix.os }} + name: New asr models + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Download icefall-asr-zipformer-multi-zh-en-2023-11-22 + shell: bash + run: | + d=sherpa-onnx-zipformer-zh-en-2023-11-22 + mkdir $d + pushd $d + + wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/tokens.txt + wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/bbpe.model + wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/decoder-epoch-34-avg-19.onnx + wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.int8.onnx + wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.onnx + wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.int8.onnx + wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.onnx + + mkdir test_wavs + cd test_wavs + wget -O 0.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav + wget -O 1.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav + + wget -O 2.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav + popd + tar cvjf $d.tar.bz2 $d + ls -lh $d + rm -rf $d + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: asr-models diff --git a/.github/workflows/android-static.yaml b/.github/workflows/android-static.yaml new file mode 100644 index 0000000000..7dad8128be --- /dev/null +++ b/.github/workflows/android-static.yaml @@ -0,0 +1,296 @@ +# static means we link onnxruntime statically +# but we still have libsherpa-onnx-jni.so +name: android-static + +on: + push: + branches: + - master + - android-link-onnxruntime-statically + paths: + - '.github/workflows/android-static.yaml' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/jni/*' + - 'build-android*.sh' + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + pull_request: + branches: + - master + paths: + - '.github/workflows/android-static.yaml' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/jni/*' + - 'build-android*.sh' + + workflow_dispatch: + +concurrency: + group: android-static-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-android-static-libs: + name: Android static libs + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android-jni-static + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: build android arm64-v8a + shell: bash + run: | + export BUILD_SHARED_LIBS=OFF + + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-android-arm64-v8a.sh + mkdir -p jniLibs/arm64-v8a/ + cp -v ./build-android-arm64-v8a-static/install/lib/*.so ./jniLibs/arm64-v8a/ + rm -rf ./build-android-arm64-v8a-static/ + + - name: build android armv7-eabi + shell: bash + run: | + export BUILD_SHARED_LIBS=OFF + + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-android-armv7-eabi.sh + mkdir -p ./jniLibs/armeabi-v7a/ + cp -v ./build-android-armv7-eabi-static/install/lib/*.so ./jniLibs/armeabi-v7a/ + rm -rf ./build-android-armv7-eabi-static + + - name: build android x86_64 + shell: bash + run: | + export BUILD_SHARED_LIBS=OFF + + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-android-x86-64.sh + mkdir -p ./jniLibs/x86_64 + cp -v ./build-android-x86-64-static/install/lib/*.so ./jniLibs/x86_64 + rm -rf ./build-android-x86-64-static + + - name: build android x86 + shell: bash + run: | + export BUILD_SHARED_LIBS=OFF + + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-android-x86.sh + mkdir -p ./jniLibs/x86 + cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86 + rm -rf ./build-android-x86 + + - name: Copy files + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV" + + filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-android-static-link-onnxruntime.tar.bz2 + + tar cjvf $filename ./jniLibs + + ls -lh + + - uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-android-libs-static + path: ./jniLibs + + # https://huggingface.co/docs/hub/spaces-github-actions + - name: Publish to huggingface + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + du -h -d1 . + ls -lh + + rm -rf huggingface + export GIT_CLONE_PROTECTION_ACTIVE=false + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + + cd huggingface + + cp -v ../sherpa-onnx-*-android*.tar.bz2 ./ + + git status + git lfs track "*.bz2" + + git add . + + git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-android.tar.bz2" + + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main + + - name: Release android libs + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: sherpa-onnx-*-android*.tar.bz2 + + build-android-aar-static: + needs: [build-android-static-libs] + name: Android AAR + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Retrieve artifact + uses: actions/download-artifact@v4 + with: + name: sherpa-onnx-android-libs-static + path: /tmp/jniLibs + + - name: Show jni libs + shell: bash + run: | + ls -lh /tmp/jniLibs + + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86 + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64 + # + - name: Copy libs + shell: bash + run: | + for arch in arm64-v8a armeabi-v7a x86 x86_64; do + cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/ + done + + - name: Check libs + shell: bash + run: | + ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/* + + - name: Build aar + shell: bash + run: | + cd android/SherpaOnnxAar + + ./gradlew :sherpa_onnx:assembleRelease + + - name: Display aar + shell: bash + run: | + cd android/SherpaOnnxAar + + ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar + cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../ + + - name: Rename aar + shell: bash + run: | + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV" + + mv sherpa_onnx-release.aar sherpa-onnx-static-link-onnxruntime-${SHERPA_ONNX_VERSION}.aar + + - uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-android-aar-static + path: ./*.aar + + # https://huggingface.co/docs/hub/spaces-github-actions + - name: Publish to huggingface + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + du -h -d1 . + ls -lh + + rm -rf huggingface + export GIT_CLONE_PROTECTION_ACTIVE=false + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + + cd huggingface + dst=android/aar + mkdir -p $dst + + cp -v ../*.aar $dst + + git status + git lfs track "*.aar" + + git add . + + git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar" + + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main + + - name: Release android aar + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: ./*.aar diff --git a/.github/workflows/android.yaml b/.github/workflows/android.yaml index 35dfd6b26f..b7da9b8a60 100644 --- a/.github/workflows/android.yaml +++ b/.github/workflows/android.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/android.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/jni/*' @@ -18,7 +17,6 @@ on: - master paths: - '.github/workflows/android.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/jni/*' @@ -32,7 +30,7 @@ concurrency: jobs: build-android-libs: - name: Android for ${{ matrix.os }} + name: Android libs runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -44,6 +42,11 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android-jni + - name: Display NDK HOME shell: bash run: | @@ -53,37 +56,57 @@ jobs: - name: build android arm64-v8a shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + export SHERPA_ONNX_ENABLE_C_API=ON ./build-android-arm64-v8a.sh mkdir -p jniLibs/arm64-v8a/ cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/ + cp -v ./build-android-arm64-v8a/install/lib/README.md ./jniLibs/arm64-v8a/ rm -rf ./build-android-arm64-v8a/ - name: build android armv7-eabi shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + export SHERPA_ONNX_ENABLE_C_API=ON ./build-android-armv7-eabi.sh mkdir -p ./jniLibs/armeabi-v7a/ cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/ + cp -v ./build-android-armv7-eabi/install/lib/README.md ./jniLibs/armeabi-v7a/ rm -rf ./build-android-armv7-eabi - name: build android x86_64 shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + export SHERPA_ONNX_ENABLE_C_API=ON ./build-android-x86-64.sh mkdir -p ./jniLibs/x86_64 cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64 + cp -v ./build-android-x86-64/install/lib/README.md ./jniLibs/x86_64 rm -rf ./build-android-x86-64 - name: build android x86 shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + export SHERPA_ONNX_ENABLE_C_API=ON ./build-android-x86.sh mkdir -p ./jniLibs/x86 cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86 + cp -v ./build-android-x86/install/lib/README.md ./jniLibs/x86 rm -rf ./build-android-x86 - name: Copy files @@ -121,7 +144,7 @@ jobs: rm -rf huggingface export GIT_CLONE_PROTECTION_ACTIVE=false - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface @@ -143,3 +166,129 @@ jobs: file_glob: true overwrite: true file: sherpa-onnx-*-android.tar.bz2 + + build-android-aar: + needs: [build-android-libs] + name: Android AAR + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Retrieve artifact + uses: actions/download-artifact@v4 + with: + name: sherpa-onnx-android-libs + path: /tmp/jniLibs + + - name: Show jni libs + shell: bash + run: | + ls -lh /tmp/jniLibs + + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86 + # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64 + # + - name: Copy libs + shell: bash + run: | + for arch in arm64-v8a armeabi-v7a x86 x86_64; do + cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/ + done + + - name: Check libs + shell: bash + run: | + ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/* + + - name: Build aar + shell: bash + run: | + cd android/SherpaOnnxAar + + ./gradlew :sherpa_onnx:assembleRelease + + - name: Display aar + shell: bash + run: | + cd android/SherpaOnnxAar + + ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar + cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../ + + + - name: Rename aar + shell: bash + run: | + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV" + + mv sherpa_onnx-release.aar sherpa-onnx-${SHERPA_ONNX_VERSION}.aar + + - uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-android-aar + path: ./*.aar + + # https://huggingface.co/docs/hub/spaces-github-actions + - name: Publish to huggingface + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + du -h -d1 . + ls -lh + + rm -rf huggingface + export GIT_CLONE_PROTECTION_ACTIVE=false + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + + cd huggingface + dst=android/aar + mkdir -p $dst + + cp -v ../*.aar $dst + + git status + git lfs track "*.aar" + + git add . + + git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar" + + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main + + - name: Release android aar + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: ./*.aar diff --git a/.github/workflows/apk-asr-2pass.yaml b/.github/workflows/apk-asr-2pass.yaml index bbe61060a8..72885db45e 100644 --- a/.github/workflows/apk-asr-2pass.yaml +++ b/.github/workflows/apk-asr-2pass.yaml @@ -23,8 +23,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - total: ["2"] - index: ["0", "1"] + total: ["4"] + index: ["0", "1", "2", "3"] steps: - uses: actions/checkout@v4 @@ -163,7 +163,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-asr.yaml b/.github/workflows/apk-asr.yaml index fc1cd1f5d8..e49b179c8b 100644 --- a/.github/workflows/apk-asr.yaml +++ b/.github/workflows/apk-asr.yaml @@ -23,8 +23,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - total: ["3"] - index: ["0", "1", "2"] + total: ["6"] + index: ["0", "1", "2", "3", "4", "5"] steps: - uses: actions/checkout@v4 @@ -163,7 +163,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-audio-tagging-wearos.yaml b/.github/workflows/apk-audio-tagging-wearos.yaml index 0ed8230769..bfe9f9ac7c 100644 --- a/.github/workflows/apk-audio-tagging-wearos.yaml +++ b/.github/workflows/apk-audio-tagging-wearos.yaml @@ -163,7 +163,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-audio-tagging.yaml b/.github/workflows/apk-audio-tagging.yaml index f6b85c3b2f..c11180c4ae 100644 --- a/.github/workflows/apk-audio-tagging.yaml +++ b/.github/workflows/apk-audio-tagging.yaml @@ -160,7 +160,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-kws.yaml b/.github/workflows/apk-kws.yaml index 524622de85..43cdef49e0 100644 --- a/.github/workflows/apk-kws.yaml +++ b/.github/workflows/apk-kws.yaml @@ -160,7 +160,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-speaker-diarization.yaml b/.github/workflows/apk-speaker-diarization.yaml new file mode 100644 index 0000000000..90bcc7323a --- /dev/null +++ b/.github/workflows/apk-speaker-diarization.yaml @@ -0,0 +1,179 @@ +name: apk-speaker-diarization + +on: + push: + branches: + - apk + + workflow_dispatch: + +concurrency: + group: apk-speaker-diarization-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_speaker_identification: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for speaker diarization ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["1"] + index: ["0"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + total=${{ matrix.total }} + index=${{ matrix.index }} + + python3 ./generate-speaker-diarization-apk-script.py --total $total --index $index + + chmod +x build-apk-speaker-diarization.sh + mv -v ./build-apk-speaker-diarization.sh ../.. + + - name: build APK + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-speaker-diarization.sh + + - name: Display APK + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + d=speaker-diarization/$SHERPA_ONNX_VERSION + mkdir -p $d/ + cp -v ../apks/*.apk $d/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/apk-speaker-identification.yaml b/.github/workflows/apk-speaker-identification.yaml index ca89ec49f6..c88718d6e0 100644 --- a/.github/workflows/apk-speaker-identification.yaml +++ b/.github/workflows/apk-speaker-identification.yaml @@ -53,6 +53,23 @@ jobs: run: | python3 -m pip install --upgrade pip jinja2 + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + - name: Generate build script shell: bash run: | @@ -82,6 +99,51 @@ jobs: ls -lh ./apks/ du -h -d1 . + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + - name: Publish to huggingface env: HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -101,7 +163,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-spoken-language-identification.yaml b/.github/workflows/apk-spoken-language-identification.yaml index 3cb9c83b28..cc7525cd42 100644 --- a/.github/workflows/apk-spoken-language-identification.yaml +++ b/.github/workflows/apk-spoken-language-identification.yaml @@ -163,7 +163,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-tts-engine.yaml b/.github/workflows/apk-tts-engine.yaml index d251483e4a..b8614cb76c 100644 --- a/.github/workflows/apk-tts-engine.yaml +++ b/.github/workflows/apk-tts-engine.yaml @@ -164,7 +164,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-tts.yaml b/.github/workflows/apk-tts.yaml index dd0aa3f775..1609739c69 100644 --- a/.github/workflows/apk-tts.yaml +++ b/.github/workflows/apk-tts.yaml @@ -164,7 +164,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/apk-vad-asr.yaml b/.github/workflows/apk-vad-asr.yaml index 8310043a9c..fe706aa14d 100644 --- a/.github/workflows/apk-vad-asr.yaml +++ b/.github/workflows/apk-vad-asr.yaml @@ -23,8 +23,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - total: ["5"] - index: ["0", "1", "2", "3", "4"] + total: ["10"] + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] steps: - uses: actions/checkout@v4 @@ -163,8 +163,9 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface + du -h -d1 . git fetch git pull git merge -m "merge remote" --ff origin main diff --git a/.github/workflows/apk-vad.yaml b/.github/workflows/apk-vad.yaml index 8253145b68..f1a4364fc0 100644 --- a/.github/workflows/apk-vad.yaml +++ b/.github/workflows/apk-vad.yaml @@ -160,13 +160,13 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface cd huggingface git fetch git pull git merge -m "merge remote" --ff origin main - d=vad/SHERPA_ONNX_VERSION + d=vad/$SHERPA_ONNX_VERSION mkdir -p $d cp -v ../apks/*.apk $d/ git status diff --git a/.github/workflows/arm-linux-gnueabihf.yaml b/.github/workflows/arm-linux-gnueabihf.yaml index a56b2cdad4..63a5cf414a 100644 --- a/.github/workflows/arm-linux-gnueabihf.yaml +++ b/.github/workflows/arm-linux-gnueabihf.yaml @@ -7,7 +7,6 @@ on: - master paths: - '.github/workflows/arm-linux-gnueabihf.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/arm-linux-gnueabihf.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -62,7 +60,7 @@ jobs: if: steps.cache-qemu.outputs.cache-hit != 'true' run: | sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build + sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev. - name: checkout-qemu if: steps.cache-qemu.outputs.cache-hit != 'true' @@ -205,7 +203,7 @@ jobs: git config --global user.name "Fangjun Kuang" rm -rf huggingface - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface mkdir -p arm32 diff --git a/.github/workflows/build-wheels-aarch64-cuda.yaml b/.github/workflows/build-wheels-aarch64-cuda.yaml new file mode 100644 index 0000000000..a221553a4a --- /dev/null +++ b/.github/workflows/build-wheels-aarch64-cuda.yaml @@ -0,0 +1,118 @@ +name: build-wheels-aarch64-cuda + +on: + push: + branches: + - wheel + workflow_dispatch: + +env: + SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1 + +concurrency: + group: build-wheels-aarch64-cuda-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_wheels_aarch64_cuda: + name: ${{ matrix.manylinux }} ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-20.04] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] + manylinux: [manylinux2014] #, manylinux_2_28] + + steps: + - uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + with: + platforms: all + + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ + # for a list of versions + - name: Build wheels + uses: pypa/cibuildwheel@v2.21.3 + env: + CIBW_BEFORE_ALL: | + git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib + cd alsa-lib + ./gitcompile + cd .. + echo "PWD" + ls -lh /project/alsa-lib/src/.libs + + CIBW_ENVIRONMENT: CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR SHERPA_ONNX_MAKE_ARGS="VERBOSE=1" SHERPA_ONNX_ENABLE_ALSA=1 SHERPA_ONNX_ENABLE_GPU=ON + CIBW_BUILD: "${{ matrix.python-version}}-* " + CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" + CIBW_BUILD_VERBOSITY: 3 + CIBW_ARCHS_LINUX: aarch64 + CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_aarch64 + # From onnxruntime >= 1.17.0, it drops support for CentOS 7.0 and it supports only manylinux_2_28. + # manylinux_2_24 is no longer supported + + - name: Display wheels + shell: bash + run: | + ls -lh ./wheelhouse/ + + - name: Install patchelf + shell: bash + run: | + sudo apt-get update -q + sudo apt-get install -q -y patchelf + patchelf --help + + - name: Patch wheels + shell: bash + run: | + mkdir ./wheels + sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels + + ls -lh ./wheels/ + rm -rf ./wheelhouse + mv ./wheels ./wheelhouse + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" + + d=cuda/$SHERPA_ONNX_VERSION + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p $d + + cp -v ../wheelhouse/*.whl $d/ + + git status + git add . + git commit -m "add more wheels" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main + + - uses: actions/upload-artifact@v4 + with: + name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }} + path: ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-aarch64.yaml b/.github/workflows/build-wheels-aarch64.yaml index 9d4ac571e5..1ba8ebd682 100644 --- a/.github/workflows/build-wheels-aarch64.yaml +++ b/.github/workflows/build-wheels-aarch64.yaml @@ -20,8 +20,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + os: [ubuntu-20.04] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] manylinux: [manylinux2014] #, manylinux_2_28] steps: @@ -35,7 +35,7 @@ jobs: # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ # for a list of versions - name: Build wheels - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_BEFORE_ALL: | git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib @@ -60,7 +60,6 @@ jobs: ls -lh ./wheelhouse/ - name: Install patchelf - if: matrix.os == 'ubuntu-latest' shell: bash run: | sudo apt-get update -q @@ -69,7 +68,6 @@ jobs: - name: Patch wheels shell: bash - if: matrix.os == 'ubuntu-latest' run: | mkdir ./wheels sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels @@ -99,7 +97,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -125,6 +123,6 @@ jobs: TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python3 -m pip install --upgrade pip - python3 -m pip install wheel twine setuptools + python3 -m pip install wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-armv7l.yaml b/.github/workflows/build-wheels-armv7l.yaml index 05c3b196dc..58a7cc8973 100644 --- a/.github/workflows/build-wheels-armv7l.yaml +++ b/.github/workflows/build-wheels-armv7l.yaml @@ -102,7 +102,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -129,6 +129,6 @@ jobs: TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python3 -m pip install --upgrade pip - python3 -m pip install wheel twine setuptools + python3 -m pip install wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-linux-cuda.yaml b/.github/workflows/build-wheels-linux-cuda.yaml index b1ee898250..1801840abc 100644 --- a/.github/workflows/build-wheels-linux-cuda.yaml +++ b/.github/workflows/build-wheels-linux-cuda.yaml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 @@ -34,7 +34,7 @@ jobs: - name: Install Python dependencies shell: bash run: | - pip install -U pip wheel setuptools twine + pip install -U pip wheel setuptools twine==5.0.0 - name: Build alsa-lib shell: bash @@ -113,7 +113,7 @@ jobs: d=cuda/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/build-wheels-linux.yaml b/.github/workflows/build-wheels-linux.yaml index e16f5bb9a7..0380e2a993 100644 --- a/.github/workflows/build-wheels-linux.yaml +++ b/.github/workflows/build-wheels-linux.yaml @@ -20,8 +20,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + os: [ubuntu-20.04] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] manylinux: [manylinux2014] #, manylinux_2_28] @@ -31,7 +31,7 @@ jobs: # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ # for a list of versions - name: Build wheels - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_BEFORE_ALL: | git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib @@ -96,7 +96,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -118,7 +118,7 @@ jobs: shell: bash run: | python3 -m pip install --upgrade pip - python3 -m pip install wheel twine setuptools + python3 -m pip install wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-macos-arm64.yaml b/.github/workflows/build-wheels-macos-arm64.yaml index ce899c5d19..fe1d316281 100644 --- a/.github/workflows/build-wheels-macos-arm64.yaml +++ b/.github/workflows/build-wheels-macos-arm64.yaml @@ -21,13 +21,13 @@ jobs: fail-fast: false matrix: os: [macos-13] - python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"] + python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] steps: - uses: actions/checkout@v4 - name: Build wheels - uses: pypa/cibuildwheel@v2.15.0 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_BUILD: "${{ matrix.python-version}}-* " CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'" @@ -68,7 +68,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -95,6 +95,6 @@ jobs: fi python3 -m pip install $opts --upgrade pip - python3 -m pip install $opts wheel twine setuptools + python3 -m pip install $opts wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-macos-universal2.yaml b/.github/workflows/build-wheels-macos-universal2.yaml index 4578d370e9..0f9dcedc78 100644 --- a/.github/workflows/build-wheels-macos-universal2.yaml +++ b/.github/workflows/build-wheels-macos-universal2.yaml @@ -21,13 +21,13 @@ jobs: fail-fast: false matrix: os: [macos-latest] - python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"] + python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] steps: - uses: actions/checkout@v4 - name: Build wheels - uses: pypa/cibuildwheel@v2.15.0 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_BUILD: "${{ matrix.python-version}}-* " CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64;x86_64'" @@ -68,7 +68,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -89,6 +89,6 @@ jobs: TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python3 -m pip install --break-system-packages --upgrade pip - python3 -m pip install --break-system-packages wheel twine setuptools + python3 -m pip install --break-system-packages wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-macos-x64.yaml b/.github/workflows/build-wheels-macos-x64.yaml index b7bf6ff54b..cbb4792e93 100644 --- a/.github/workflows/build-wheels-macos-x64.yaml +++ b/.github/workflows/build-wheels-macos-x64.yaml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: os: [macos-13] - python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] steps: - uses: actions/checkout@v4 @@ -42,7 +42,7 @@ jobs: - name: Build wheels if: matrix.python-version != 'cp37' - uses: pypa/cibuildwheel@v2.15.0 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_BUILD: "${{ matrix.python-version}}-* " CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='x86_64'" @@ -83,7 +83,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -110,6 +110,6 @@ jobs: fi python3 -m pip install $opts --upgrade pip - python3 -m pip install $opts wheel twine setuptools + python3 -m pip install $opts wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-win32.yaml b/.github/workflows/build-wheels-win32.yaml index 2560847830..732a17d7b5 100644 --- a/.github/workflows/build-wheels-win32.yaml +++ b/.github/workflows/build-wheels-win32.yaml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: os: [windows-latest] - python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] steps: - uses: actions/checkout@v4 @@ -29,7 +29,7 @@ jobs: # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ # for a list of versions - name: Build wheels - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-A Win32" CIBW_BUILD: "${{ matrix.python-version}}-* " @@ -67,7 +67,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -88,6 +88,6 @@ jobs: TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python3 -m pip install --upgrade pip - python3 -m pip install wheel twine setuptools + python3 -m pip install wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-win64-cuda.yaml b/.github/workflows/build-wheels-win64-cuda.yaml index f0a17da8cc..27b4fb87eb 100644 --- a/.github/workflows/build-wheels-win64-cuda.yaml +++ b/.github/workflows/build-wheels-win64-cuda.yaml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: os: [windows-2019] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 @@ -75,7 +75,7 @@ jobs: d=cuda/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/build-wheels-win64.yaml b/.github/workflows/build-wheels-win64.yaml index 14e3e2ac4d..f2cc7c157a 100644 --- a/.github/workflows/build-wheels-win64.yaml +++ b/.github/workflows/build-wheels-win64.yaml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: os: [windows-2019] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 @@ -73,7 +73,7 @@ jobs: d=cpu/$SHERPA_ONNX_VERSION - git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface cd huggingface git fetch git pull @@ -94,6 +94,6 @@ jobs: TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python3 -m pip install --upgrade pip - python3 -m pip install wheel twine setuptools + python3 -m pip install wheel twine==5.0.0 setuptools twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-xcframework.yaml b/.github/workflows/build-xcframework.yaml index 2afd95cab9..8fcfafd43d 100644 --- a/.github/workflows/build-xcframework.yaml +++ b/.github/workflows/build-xcframework.yaml @@ -43,6 +43,13 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Build iOS shared + if: matrix.with_tts == 'ON' + shell: bash + run: | + export CMAKE_VERBOSE_MAKEFILE=ON + ./build-ios-shared.sh + - name: Build iOS if: matrix.with_tts == 'ON' shell: bash @@ -135,7 +142,7 @@ jobs: rm -rf huggingface export GIT_CLONE_PROTECTION_ACTIVE=false - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface diff --git a/.github/workflows/c-api-from-buffer.yaml b/.github/workflows/c-api-from-buffer.yaml index 4352cd7ce9..5d9bc11db0 100644 --- a/.github/workflows/c-api-from-buffer.yaml +++ b/.github/workflows/c-api-from-buffer.yaml @@ -8,7 +8,6 @@ on: - 'v[0-9]+.[0-9]+.[0-9]+*' paths: - '.github/workflows/c-api-from-buffer.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/c-api-from-buffer.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -215,4 +213,4 @@ jobs: ./keywords-spotter-buffered-tokens-keywords-c-api - rm -rf sherpa-onnx-kws-zipformer-* \ No newline at end of file + rm -rf sherpa-onnx-kws-zipformer-* diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml index 589bda71f1..4a4108c989 100644 --- a/.github/workflows/c-api.yaml +++ b/.github/workflows/c-api.yaml @@ -4,11 +4,8 @@ on: push: branches: - master - tags: - - 'v[0-9]+.[0-9]+.[0-9]+*' paths: - '.github/workflows/c-api.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +16,6 @@ on: - master paths: - '.github/workflows/c-api.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -83,6 +79,201 @@ jobs: otool -L ./install/lib/libsherpa-onnx-c-api.dylib fi + - name: Test kws (zh) + shell: bash + run: | + gcc -o kws-c-api ./c-api-examples/kws-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./kws-c-api + + rm ./kws-c-api + rm -rf sherpa-onnx-kws-* + + - name: Test Kokoro TTS (zh+en) + shell: bash + run: | + gcc -o kokoro-tts-zh-en-c-api ./c-api-examples/kokoro-tts-zh-en-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 + tar xf kokoro-multi-lang-v1_0.tar.bz2 + rm kokoro-multi-lang-v1_0.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./kokoro-tts-zh-en-c-api + + rm ./kokoro-tts-zh-en-c-api + rm -rf kokoro-zh-en-* + + - name: Test Kokoro TTS (en) + shell: bash + run: | + gcc -o kokoro-tts-en-c-api ./c-api-examples/kokoro-tts-en-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 + tar xf kokoro-en-v0_19.tar.bz2 + rm kokoro-en-v0_19.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./kokoro-tts-en-c-api + + rm ./kokoro-tts-en-c-api + rm -rf kokoro-en-* + + - uses: actions/upload-artifact@v4 + with: + name: kokoro-tts-${{ matrix.os }} + path: ./generated-kokoro-*.wav + + - name: Test Matcha TTS (zh) + shell: bash + run: | + gcc -o matcha-tts-zh-c-api ./c-api-examples/matcha-tts-zh-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 + tar xvf matcha-icefall-zh-baker.tar.bz2 + rm matcha-icefall-zh-baker.tar.bz2 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./matcha-tts-zh-c-api + + rm ./matcha-tts-zh-c-api + rm -rf matcha-icefall-* + rm hifigan_v2.onnx + + - name: Test Matcha TTS (en) + shell: bash + run: | + gcc -o matcha-tts-en-c-api ./c-api-examples/matcha-tts-en-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 + tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 + rm matcha-icefall-en_US-ljspeech.tar.bz2 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./matcha-tts-en-c-api + + rm ./matcha-tts-en-c-api + rm -rf matcha-icefall-* + rm hifigan_v2.onnx + + - uses: actions/upload-artifact@v4 + with: + name: matcha-tts-${{ matrix.os }} + path: ./generated-matcha-*.wav + + - name: Test vad + Whisper tiny.en + shell: bash + run: | + gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + # Now download models + # + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 + rm sherpa-onnx-whisper-tiny.en.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./vad-whisper-c-api + + rm -rf sherpa-onnx-* + rm -rf *.onnx + rm *.wav + + - name: Test vad + Moonshine + shell: bash + run: | + gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + # Now download models + # + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./vad-moonshine-c-api + + rm -rf sherpa-onnx-* + rm -rf *.onnx + rm *.wav + + - name: Test Moonshine + shell: bash + run: | + gcc -o moonshine-c-api ./c-api-examples/moonshine-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./moonshine-c-api + + rm -rf sherpa-onnx-* + - name: Test ffmpeg if: matrix.os == 'macos-latest' shell: bash diff --git a/.github/workflows/checksum.yaml b/.github/workflows/checksum.yaml new file mode 100644 index 0000000000..e500209d60 --- /dev/null +++ b/.github/workflows/checksum.yaml @@ -0,0 +1,21 @@ +name: Create checksum + +on: + schedule: + - cron: "0 1 * * *" # Runs at 1:00 AM UTC daily + workflow_dispatch: + +jobs: + checksum: + if: github.repository_owner == 'k2-fsa' + runs-on: macos-latest + strategy: + matrix: + tag: [null, asr-models, tts-models, kws-models, speaker-recongition-models, audio-tagging-models, punctuation-models] + steps: + - name: Run checksum action + uses: thewh1teagle/checksum@v1 + with: + tag: ${{ matrix.tag }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/cxx-api.yaml b/.github/workflows/cxx-api.yaml new file mode 100644 index 0000000000..e5a99fb09b --- /dev/null +++ b/.github/workflows/cxx-api.yaml @@ -0,0 +1,332 @@ +name: cxx-api + +on: + push: + branches: + - master + - cxx-api-asr-non-streaming + paths: + - '.github/workflows/cxx-api.yaml' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'cxx-api-examples/**' + pull_request: + branches: + - master + paths: + - '.github/workflows/cxx-api.yaml' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'cxx-api-examples/**' + + workflow_dispatch: + +concurrency: + group: cxx-api-${{ github.ref }} + cancel-in-progress: true + +jobs: + cxx_api: + name: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-cxx-api-shared + + - name: Build sherpa-onnx + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build + cd build + + cmake \ + -D CMAKE_BUILD_TYPE=Release \ + -D BUILD_SHARED_LIBS=ON \ + -D CMAKE_INSTALL_PREFIX=./install \ + -D SHERPA_ONNX_ENABLE_BINARY=OFF \ + .. + + make -j2 install + + ls -lh install/lib + ls -lh install/include + + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + ldd ./install/lib/libsherpa-onnx-c-api.so + ldd ./install/lib/libsherpa-onnx-cxx-api.so + echo "---" + readelf -d ./install/lib/libsherpa-onnx-c-api.so + readelf -d ./install/lib/libsherpa-onnx-cxx-api.so + fi + + if [[ ${{ matrix.os }} == macos-latest ]]; then + otool -L ./install/lib/libsherpa-onnx-c-api.dylib + otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib + fi + + - name: Test KWS (zh) + shell: bash + run: | + g++ -std=c++17 -o kws-cxx-api ./cxx-api-examples/kws-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./kws-cxx-api + + rm kws-cxx-api + rm -rf sherpa-onnx-kws-* + + - name: Test Kokoro TTS (zh+en) + shell: bash + run: | + g++ -std=c++17 -o kokoro-tts-zh-en-cxx-api ./cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 + tar xf kokoro-multi-lang-v1_0.tar.bz2 + rm kokoro-multi-lang-v1_0.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./kokoro-tts-zh-en-cxx-api + + rm kokoro-tts-zh-en-cxx-api + rm -rf kokoro-* + + - name: Test Kokoro TTS (en) + shell: bash + run: | + g++ -std=c++17 -o kokoro-tts-en-cxx-api ./cxx-api-examples/kokoro-tts-en-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 + tar xf kokoro-en-v0_19.tar.bz2 + rm kokoro-en-v0_19.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./kokoro-tts-en-cxx-api + + rm kokoro-tts-en-cxx-api + rm -rf kokoro-en-* + + - uses: actions/upload-artifact@v4 + with: + name: kokoro-tts-${{ matrix.os }} + path: ./generated-kokoro-*.wav + + - name: Test Matcha TTS (zh) + shell: bash + run: | + g++ -std=c++17 -o matcha-tts-zh-cxx-api ./cxx-api-examples/matcha-tts-zh-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 + tar xvf matcha-icefall-zh-baker.tar.bz2 + rm matcha-icefall-zh-baker.tar.bz2 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./matcha-tts-zh-cxx-api + + rm -rf matcha-icefall-* + rm hifigan_v2.onnx + rm matcha-tts-zh-cxx-api + + - name: Test Matcha TTS (en) + shell: bash + run: | + g++ -std=c++17 -o matcha-tts-en-cxx-api ./cxx-api-examples/matcha-tts-en-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 + tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 + rm matcha-icefall-en_US-ljspeech.tar.bz2 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./matcha-tts-en-cxx-api + + rm matcha-tts-en-cxx-api + rm -rf matcha-icefall-* + rm hifigan_v2.onnx + + - uses: actions/upload-artifact@v4 + with: + name: matcha-tts-${{ matrix.os }} + path: ./generated-matcha-*.wav + + - name: Test Moonshine tiny + shell: bash + run: | + g++ -std=c++17 -o moonshine-cxx-api ./cxx-api-examples/moonshine-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./moonshine-cxx-api + + rm -rf sherpa-onnx-* + rm ./moonshine-cxx-api + + - name: Test whisper + shell: bash + run: | + g++ -std=c++17 -o whisper-cxx-api ./cxx-api-examples/whisper-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh whisper-cxx-api + + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + ldd ./whisper-cxx-api + echo "----" + readelf -d ./whisper-cxx-api + fi + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 + rm sherpa-onnx-whisper-tiny.en.tar.bz2 + + ls -lh sherpa-onnx-whisper-tiny.en + echo "---" + ls -lh sherpa-onnx-whisper-tiny.en/test_wavs + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./whisper-cxx-api + + rm -rf sherpa-onnx-whisper-* + rm ./whisper-cxx-api + + - name: Test SenseVoice + shell: bash + run: | + g++ -std=c++17 -o sense-voice-cxx-api ./cxx-api-examples/sense-voice-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh sense-voice-cxx-api + + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + ldd ./sense-voice-cxx-api + echo "----" + readelf -d ./sense-voice-cxx-api + fi + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + + ls -lh sherpa-onnx-sense-voice-* + echo "---" + ls -lh sherpa-onnx-sense-voice-*/test_wavs + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./sense-voice-cxx-api + + rm -rf sherpa-onnx-sense-voice-* + rm ./sense-voice-cxx-api + + - name: Test streaming zipformer + shell: bash + run: | + g++ -std=c++17 -o streaming-zipformer-cxx-api ./cxx-api-examples/streaming-zipformer-cxx-api.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh streaming-zipformer-cxx-api + + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + ldd ./streaming-zipformer-cxx-api + echo "----" + readelf -d ./streaming-zipformer-cxx-api + fi + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + + ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + echo "---" + ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./streaming-zipformer-cxx-api + + rm -rf sherpa-onnx-streaming-zipformer-* + rm ./streaming-zipformer-cxx-api diff --git a/.github/workflows/dot-net.yaml b/.github/workflows/dot-net.yaml index 36637a9e2c..899cb99956 100644 --- a/.github/workflows/dot-net.yaml +++ b/.github/workflows/dot-net.yaml @@ -90,7 +90,7 @@ jobs: export GIT_CLONE_PROTECTION_ACTIVE=false export GIT_LFS_SKIP_SMUDGE=1 - git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface cd huggingface git fetch @@ -125,9 +125,7 @@ jobs: - name: Setup .NET uses: actions/setup-dotnet@v4 with: - dotnet-version: | - 6.0.x - 7.0.x + dotnet-version: 8.0.x - name: Install Python dependencies shell: bash diff --git a/.github/workflows/export-3dspeaker-to-onnx.yaml b/.github/workflows/export-3dspeaker-to-onnx.yaml index 42c965c909..e62d42784b 100644 --- a/.github/workflows/export-3dspeaker-to-onnx.yaml +++ b/.github/workflows/export-3dspeaker-to-onnx.yaml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-latest] + os: [ubuntu-latest] python-version: ["3.8"] steps: @@ -43,3 +43,28 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: speaker-recongition-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=speaker-embedding-models + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface + mv -v ./*.onnx ./huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/.github/workflows/export-ced-to-onnx.yaml b/.github/workflows/export-ced-to-onnx.yaml index 70c4cc5fb5..2f714bb80b 100644 --- a/.github/workflows/export-ced-to-onnx.yaml +++ b/.github/workflows/export-ced-to-onnx.yaml @@ -66,7 +66,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 d=sherpa-onnx-ced-$m-audio-tagging-2024-04-19 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/k2-fsa/$d huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/$d huggingface mv -v $d/* huggingface cd huggingface git lfs track "*.onnx" diff --git a/.github/workflows/export-kokoro.yaml b/.github/workflows/export-kokoro.yaml new file mode 100644 index 0000000000..e6aae1da62 --- /dev/null +++ b/.github/workflows/export-kokoro.yaml @@ -0,0 +1,226 @@ +name: export-kokoro-to-onnx + +on: + push: + branches: + - export-kokoro + + workflow_dispatch: + +concurrency: + group: export-kokoro-to-onnx-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-kokoro-to-onnx: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: export kokoro ${{ matrix.version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + version: ["0.19", "1.0"] + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python dependencies + shell: bash + run: | + pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch + + - name: Run + shell: bash + run: | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 + tar xf espeak-ng-data.tar.bz2 + rm espeak-ng-data.tar.bz2 + cd scripts/kokoro + v=${{ matrix.version }} + if [[ $v = "0.19" ]]; then + ./run.sh + elif [[ $v == "1.0" ]]; then + cd v1.0 + ./run.sh + fi + + - name: Collect results ${{ matrix.version }} + if: matrix.version == '0.19' + shell: bash + run: | + src=scripts/kokoro + + d=kokoro-en-v0_19 + mkdir $d + cp -a LICENSE $d/LICENSE + cp -a espeak-ng-data $d/ + cp -v $src/kokoro-v0_19.onnx $d/model.onnx + cp -v $src/voices.bin $d/ + cp -v $src/tokens.txt $d/ + cp -v $src/README-new.md $d/README.md + ls -lh $d/ + tar cjfv $d.tar.bz2 $d + rm -rf $d + + ls -lh $d.tar.bz2 + + - name: Collect results ${{ matrix.version }} + if: matrix.version == '1.0' + shell: bash + run: | + curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2 + tar xvf dict.tar.bz2 + rm dict.tar.bz2 + + curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst + curl -SL -o number-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst + curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst + + src=scripts/kokoro/v1.0 + + d=kokoro-multi-lang-v1_0 + mkdir $d + cp -a LICENSE $d/LICENSE + cp -a espeak-ng-data $d/ + cp -v $src/kokoro.onnx $d/model.onnx + cp -v $src/voices.bin $d/ + cp -v $src/tokens.txt $d/ + cp -v $src/lexicon*.txt $d/ + cp -v $src/README.md $d/README.md + cp -av dict $d/ + cp -v ./*.fst $d/ + ls -lh $d/ + echo "---" + ls -lh $d/dict + + tar cjfv $d.tar.bz2 $d + rm -rf $d + + ls -lh $d.tar.bz2 + + - name: Publish to huggingface ${{ matrix.version }} + if: matrix.version == '0.19' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 huggingface + cd huggingface + rm -rf ./* + git fetch + git pull + + git lfs track "cmn_dict" + git lfs track "ru_dict" + git lfs track "*.wav" + + cp -a ../espeak-ng-data ./ + mkdir -p test_wavs + + cp -v ../scripts/kokoro/kokoro-v0_19.onnx ./model.onnx + + cp -v ../scripts/kokoro/kokoro-v0_19-*.wav ./test_wavs/ + + cp -v ../scripts/kokoro/tokens.txt . + cp -v ../scripts/kokoro/voices.bin . + cp -v ../scripts/kokoro/README-new.md ./README.md + cp -v ../LICENSE ./ + + git lfs track "*.onnx" + git add . + + ls -lh + + git status + + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true + + - name: Publish to huggingface ${{ matrix.version }} + if: matrix.version == '1.0' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 huggingface + cd huggingface + rm -rf ./* + git fetch + git pull + + git lfs track "cmn_dict" + git lfs track "ru_dict" + git lfs track "*.wav" + git lfs track "lexicon*.txt" + + cp -a ../espeak-ng-data ./ + + cp -v ../scripts/kokoro/v1.0/kokoro.onnx ./model.onnx + + + cp -v ../scripts/kokoro/v1.0/tokens.txt . + cp -v ../scripts/kokoro/v1.0/voices.bin . + cp -v ../scripts/kokoro/v1.0/lexicon*.txt . + cp -v ../scripts/kokoro/v1.0/README.md ./README.md + cp -v ../LICENSE ./ + cp -av ../dict ./ + cp -v ../*.fst ./ + + git lfs track "*.onnx" + git add . + + ls -lh + + git status + + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true + + - name: Release + if: github.repository_owner == 'csukuangfj' + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: tts-models + + - name: Release + if: github.repository_owner == 'k2-fsa' + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + tag: tts-models diff --git a/.github/workflows/export-libriheavy.yaml b/.github/workflows/export-libriheavy.yaml index cfe0a28d20..69c22ef243 100644 --- a/.github/workflows/export-libriheavy.yaml +++ b/.github/workflows/export-libriheavy.yaml @@ -56,7 +56,7 @@ jobs: src=sherpa-onnx-zipformer-en-libriheavy-20230926-$m echo "Process $src" - git clone https://huggingface.co/csukuangfj/$src huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface cd huggingface git fetch git pull @@ -100,7 +100,7 @@ jobs: src=sherpa-onnx-zipformer-en-libriheavy-20230830-$m-punct-case echo "Process $src" - git clone https://huggingface.co/csukuangfj/$src huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/export-melo-tts-to-onnx.yaml b/.github/workflows/export-melo-tts-to-onnx.yaml index 0dc9bfe9d7..d0715b95a3 100644 --- a/.github/workflows/export-melo-tts-to-onnx.yaml +++ b/.github/workflows/export-melo-tts-to-onnx.yaml @@ -40,7 +40,7 @@ jobs: name: test.wav path: scripts/melo-tts/test.wav - - name: Publish to huggingface + - name: Publish to huggingface (Chinese + English) env: HF_TOKEN: ${{ secrets.HF_TOKEN }} uses: nick-fields/retry@v3 @@ -56,19 +56,19 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/vits-melo-tts-zh_en huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en huggingface cd huggingface git fetch git pull echo "pwd: $PWD" - ls -lh ../scripts/melo-tts + ls -lh ../scripts/melo-tts/zh_en rm -rf ./ - cp -v ../scripts/melo-tts/*.onnx . - cp -v ../scripts/melo-tts/lexicon.txt . - cp -v ../scripts/melo-tts/tokens.txt . - cp -v ../scripts/melo-tts/README.md . + cp -v ../scripts/melo-tts/zh_en/*.onnx . + cp -v ../scripts/melo-tts/zh_en/lexicon.txt . + cp -v ../scripts/melo-tts/zh_en/tokens.txt . + cp -v ../scripts/melo-tts/zh_en/README.md . curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE @@ -102,6 +102,60 @@ jobs: tar cjvf $dst.tar.bz2 $dst rm -rf $dst + - name: Publish to huggingface (English) + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en huggingface + cd huggingface + git fetch + git pull + echo "pwd: $PWD" + ls -lh ../scripts/melo-tts/en + + rm -rf ./ + + cp -v ../scripts/melo-tts/en/*.onnx . + cp -v ../scripts/melo-tts/en/lexicon.txt . + cp -v ../scripts/melo-tts/en/tokens.txt . + cp -v ../scripts/melo-tts/en/README.md . + + curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE + + git lfs track "*.onnx" + git add . + + ls -lh + + git status + + git diff + + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en main || true + + cd .. + + rm -rf huggingface/.git* + dst=vits-melo-tts-en + + mv huggingface $dst + + tar cjvf $dst.tar.bz2 $dst + rm -rf $dst + - name: Release uses: svenstaro/upload-release-action@v2 with: diff --git a/.github/workflows/export-moonshine-to-onnx.yaml b/.github/workflows/export-moonshine-to-onnx.yaml new file mode 100644 index 0000000000..2e73c2e049 --- /dev/null +++ b/.github/workflows/export-moonshine-to-onnx.yaml @@ -0,0 +1,106 @@ +name: export-moonshine-to-onnx + +on: + workflow_dispatch: + +concurrency: + group: export-moonshine-to-onnx-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-moonshine-to-onnx: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: export moonshine models to ONNX + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python dependencies + shell: bash + run: | + pip install -q onnx onnxruntime librosa tokenizers soundfile + + - name: Run + shell: bash + run: | + pushd scripts/moonshine + ./run.sh + popd + + mv -v scripts/moonshine/*.tar.bz2 . + mv -v scripts/moonshine/sherpa-onnx-* ./ + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: asr-models + + - name: Publish to huggingface (tiny) + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=sherpa-onnx-moonshine-tiny-en-int8 + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface + mv -v $d/* ./huggingface + cd huggingface + git lfs track "*.onnx" + git lfs track "*.wav" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main + rm -rf huggingface + + - name: Publish to huggingface (base) + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=sherpa-onnx-moonshine-base-en-int8 + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface + mv -v $d/* ./huggingface + cd huggingface + git lfs track "*.onnx" + git lfs track "*.wav" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main + rm -rf huggingface diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml index 138c708ad7..bbabfb60cb 100644 --- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml +++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml @@ -67,7 +67,7 @@ jobs: rm -rf huggingface export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/$m huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface cp -av $m/* huggingface cd huggingface git lfs track "*.onnx" diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml index 7a7b7fc4eb..4a7e2339ed 100644 --- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml +++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml @@ -67,7 +67,7 @@ jobs: rm -rf huggingface export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/$m huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface cp -av $m/* huggingface cd huggingface git lfs track "*.onnx" diff --git a/.github/workflows/export-nemo-giga-am-to-onnx.yaml b/.github/workflows/export-nemo-giga-am-to-onnx.yaml new file mode 100644 index 0000000000..1af754d0b4 --- /dev/null +++ b/.github/workflows/export-nemo-giga-am-to-onnx.yaml @@ -0,0 +1,116 @@ +name: export-nemo-giga-am-to-onnx + +on: + workflow_dispatch: + +concurrency: + group: export-nemo-giga-am-to-onnx-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-nemo-am-giga-to-onnx: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: export nemo GigaAM models to ONNX + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run CTC + shell: bash + run: | + pushd scripts/nemo/GigaAM + ./run-ctc.sh + popd + + d=sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24 + mkdir $d + mkdir $d/test_wavs + rm scripts/nemo/GigaAM/model.onnx + mv -v scripts/nemo/GigaAM/*.int8.onnx $d/ + cp -v scripts/nemo/GigaAM/*.md $d/ + mv -v scripts/nemo/GigaAM/*.pdf $d/ + mv -v scripts/nemo/GigaAM/tokens.txt $d/ + mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/ + mv -v scripts/nemo/GigaAM/run-ctc.sh $d/ + mv -v scripts/nemo/GigaAM/*-ctc.py $d/ + + ls -lh scripts/nemo/GigaAM/ + + ls -lh $d + + tar cjvf ${d}.tar.bz2 $d + + - name: Run Transducer + shell: bash + run: | + pushd scripts/nemo/GigaAM + ./run-rnnt.sh + popd + + d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24 + mkdir $d + mkdir $d/test_wavs + + mv -v scripts/nemo/GigaAM/encoder.int8.onnx $d/ + mv -v scripts/nemo/GigaAM/decoder.onnx $d/ + mv -v scripts/nemo/GigaAM/joiner.onnx $d/ + + cp -v scripts/nemo/GigaAM/*.md $d/ + mv -v scripts/nemo/GigaAM/*.pdf $d/ + mv -v scripts/nemo/GigaAM/tokens.txt $d/ + mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/ + mv -v scripts/nemo/GigaAM/run-rnnt.sh $d/ + mv -v scripts/nemo/GigaAM/*-rnnt.py $d/ + + ls -lh scripts/nemo/GigaAM/ + + ls -lh $d + + tar cjvf ${d}.tar.bz2 $d + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: asr-models + + - name: Publish to huggingface (Transducer) + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/ + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface + mv -v $d/* ./huggingface + cd huggingface + git lfs track "*.onnx" + git lfs track "*.wav" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml index 180c3dc12a..5059664130 100644 --- a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml +++ b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest] + os: [macos-latest] python-version: ["3.10"] steps: @@ -43,3 +43,28 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: speaker-recongition-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=speaker-embedding-models + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface + mv -v ./*.onnx ./huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/.github/workflows/export-pyannote-segmentation-to-onnx.yaml b/.github/workflows/export-pyannote-segmentation-to-onnx.yaml index 300aca500c..53f8dac7d4 100644 --- a/.github/workflows/export-pyannote-segmentation-to-onnx.yaml +++ b/.github/workflows/export-pyannote-segmentation-to-onnx.yaml @@ -29,7 +29,7 @@ jobs: - name: Install pyannote shell: bash run: | - pip install pyannote.audio onnx onnxruntime + pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3 - name: Run shell: bash @@ -75,7 +75,7 @@ jobs: d=sherpa-onnx-pyannote-segmentation-3-0 export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/$d huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface cp -v $d/* ./huggingface cd huggingface git lfs track "*.onnx" diff --git a/.github/workflows/export-revai-segmentation-to-onnx.yaml b/.github/workflows/export-revai-segmentation-to-onnx.yaml new file mode 100644 index 0000000000..d82f7c4e09 --- /dev/null +++ b/.github/workflows/export-revai-segmentation-to-onnx.yaml @@ -0,0 +1,86 @@ +name: export-revai-segmentation-to-onnx + +on: + workflow_dispatch: + +concurrency: + group: export-revai-segmentation-to-onnx-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-revai-segmentation-to-onnx: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: export revai segmentation models to ONNX + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install pyannote + shell: bash + run: | + pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3 + + - name: Run + shell: bash + run: | + d=sherpa-onnx-reverb-diarization-v1 + src=$PWD/$d + mkdir -p $src + + pushd scripts/pyannote/segmentation + ./run-revai.sh + cp ./*.onnx $src/ + cp ./README.md $src/ + cp ./LICENSE $src/ + cp ./run-revai.sh $src/run.sh + cp ./*.py $src/ + + popd + ls -lh $d + tar cjfv $d.tar.bz2 $d + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: speaker-segmentation-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=sherpa-onnx-reverb-diarization-v1 + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface + cp -v $d/* ./huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/.github/workflows/export-sense-voice-to-onnx.yaml b/.github/workflows/export-sense-voice-to-onnx.yaml index 41a9a31a64..1c3e917296 100644 --- a/.github/workflows/export-sense-voice-to-onnx.yaml +++ b/.github/workflows/export-sense-voice-to-onnx.yaml @@ -66,7 +66,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/export-telespeech-ctc.yaml b/.github/workflows/export-telespeech-ctc.yaml index 102c3884eb..4f66d7ca4b 100644 --- a/.github/workflows/export-telespeech-ctc.yaml +++ b/.github/workflows/export-telespeech-ctc.yaml @@ -60,7 +60,7 @@ jobs: export GIT_CLONE_PROTECTION_ACTIVE=false - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf cp -a $src/* hf/ cd hf git lfs track "*.pdf" @@ -84,7 +84,7 @@ jobs: export GIT_CLONE_PROTECTION_ACTIVE=false rm -rf hf - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf + GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf cp -a $src/* hf/ cd hf git lfs track "*.pdf" diff --git a/.github/workflows/export-wenet-to-onnx.yaml b/.github/workflows/export-wenet-to-onnx.yaml index 626f477e61..7ef3a54b64 100644 --- a/.github/workflows/export-wenet-to-onnx.yaml +++ b/.github/workflows/export-wenet-to-onnx.yaml @@ -49,7 +49,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell huggingface cd huggingface git fetch git pull @@ -98,7 +98,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 huggingface cd huggingface git fetch git pull @@ -147,7 +147,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn huggingface cd huggingface git fetch git pull @@ -196,7 +196,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech huggingface cd huggingface git fetch git pull @@ -245,7 +245,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech huggingface cd huggingface git fetch git pull @@ -295,7 +295,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false - git clone https://huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/export-wespeaker-to-onnx.yaml b/.github/workflows/export-wespeaker-to-onnx.yaml index fd167ab211..05694f693a 100644 --- a/.github/workflows/export-wespeaker-to-onnx.yaml +++ b/.github/workflows/export-wespeaker-to-onnx.yaml @@ -48,3 +48,28 @@ jobs: repo_name: k2-fsa/sherpa-onnx repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} tag: speaker-recongition-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + d=speaker-embedding-models + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface + mv -v ./*.onnx ./huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index a50aa99d74..53aebdd3b6 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -145,7 +145,7 @@ jobs: export GIT_LFS_SKIP_SMUDGE=1 - git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface rm -rf huggingface/* diff --git a/.github/workflows/flutter-android.yaml b/.github/workflows/flutter-android.yaml index 9752a82c6c..c2b1d01db1 100644 --- a/.github/workflows/flutter-android.yaml +++ b/.github/workflows/flutter-android.yaml @@ -214,7 +214,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/flutter-linux.yaml b/.github/workflows/flutter-linux.yaml index b6b1fb9c84..f1fdd5ec71 100644 --- a/.github/workflows/flutter-linux.yaml +++ b/.github/workflows/flutter-linux.yaml @@ -261,7 +261,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/flutter-macos.yaml b/.github/workflows/flutter-macos.yaml index 7c8a38e4c9..e85ff1644f 100644 --- a/.github/workflows/flutter-macos.yaml +++ b/.github/workflows/flutter-macos.yaml @@ -101,7 +101,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface cd huggingface git fetch git pull @@ -207,7 +207,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/flutter-windows-x64.yaml b/.github/workflows/flutter-windows-x64.yaml index f4d296b709..59f6a6af92 100644 --- a/.github/workflows/flutter-windows-x64.yaml +++ b/.github/workflows/flutter-windows-x64.yaml @@ -94,7 +94,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface cd huggingface git fetch git pull @@ -192,7 +192,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface cd huggingface git fetch git pull diff --git a/.github/workflows/hap-vad-asr.yaml b/.github/workflows/hap-vad-asr.yaml new file mode 100644 index 0000000000..9e64a9ab16 --- /dev/null +++ b/.github/workflows/hap-vad-asr.yaml @@ -0,0 +1,173 @@ +name: hap-vad-asr + +on: + push: + branches: + - hap + - hap-ci + + workflow_dispatch: + +concurrency: + group: hap-vad-asr-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + hap_vad_asr: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: Haps for vad asr ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["10"] + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '17' # it requires jdk 17 to sigh the hap + + - name: Show java version + shell: bash + run: | + which java + java --version + + - name: cache-toolchain + id: cache-toolchain-ohos + uses: actions/cache@v4 + with: + path: command-line-tools + key: commandline-tools-linux-x64-5.0.5.200.zip + + - name: Download toolchain + if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true' + shell: bash + run: | + curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip + unzip commandline-tools-linux-x64-5.0.5.200.zip + rm commandline-tools-linux-x64-5.0.5.200.zip + + - name: Set environment variable + shell: bash + run: | + echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin" >> "$GITHUB_PATH" + which cmake + + cmake --version + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Generate build script + shell: bash + run: | + cd scripts/hap + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-vad-asr-hap-script.py --total $total --index $index + ls -lh + + chmod +x build-hap-vad-asr.sh + mv -v ./build-hap-vad-asr.sh ../.. + + - name: Generate secrets + shell: bash + run: | + echo "${{ secrets.HAP_SHERPA_ONNX_CER }}" > /tmp/sherpa_onnx.cer + shasum -a 256 /tmp/sherpa_onnx.cer + ls -lh /tmp/sherpa_onnx.cer + + # macos + # base64 -i sherpa_onnx_profileRelease.p7b -o sherpa_onnx_profileRelease.p7b.base64 + # + # linux + # base64 -w 0 sherpa_onnx_profileRelease.p7b > sherpa_onnx_profileRelease.p7b.base64 + # + # cat sherpa_onnx_profileRelease.p7b.base64 | base64 --decode > sherpa_onnx_profileRelease.p7b + # + echo "${{ secrets.HAP_SHERPA_ONNX_PROFILE }}" | base64 --decode > /tmp/sherpa_onnx_profileRelease.p7b + echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" > ./sherpa_onnx_ohos_key.p12.base64 + echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" | base64 --decode > /tmp/sherpa_onnx_ohos_key.p12 + + ls -l /tmp/sherpa_onnx_profileRelease.p7b + ls -l /tmp/sherpa_onnx_ohos_key.p12 + + ls -lh ./sherpa_onnx_ohos_key.p12.base64 + shasum -a 256 ./sherpa_onnx_ohos_key.p12.base64 + wc ./sherpa_onnx_ohos_key.p12.base64 + rm ./sherpa_onnx_ohos_key.p12.base64 + + shasum -a 256 /tmp/sherpa_onnx_profileRelease.p7b + shasum -a 256 /tmp/sherpa_onnx_ohos_key.p12 + + - name: build HAP + env: + HAP_KEY_ALIAS: ${{ secrets.HAP_KEY_ALIAS }} + HAP_KEY_PWD: ${{ secrets.HAP_KEY_PWD }} + HAP_KEY_STORE_PWD: ${{ secrets.HAP_KEY_STORE_PWD }} + shell: bash + run: | + export COMMANDLINE_TOOLS_DIR=$GITHUB_WORKSPACE/command-line-tools + ./build-hap-vad-asr.sh + + # remove secrets + rm /tmp/sherpa_onnx.cer + rm /tmp/sherpa_onnx_profileRelease.p7b + rm /tmp/sherpa_onnx_ohos_key.p12 + + - name: Display HAPs + shell: bash + run: | + ls -lh ./haps/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface + cd huggingface + du -h -d1 . + git fetch + git pull + git merge -m "merge remote" --ff origin main + + d=hap/vad-asr/$SHERPA_ONNX_VERSION + mkdir -p $d + cp -v ../haps/*.hap $d/ + git status + git lfs track "*.hap" + git add . + git commit -m "add more HAPs" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main diff --git a/.github/workflows/har.yaml b/.github/workflows/har.yaml new file mode 100644 index 0000000000..7b5b2e5141 --- /dev/null +++ b/.github/workflows/har.yaml @@ -0,0 +1,214 @@ +name: har + +on: + push: + branches: + - master + # - ohos-har + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + + workflow_dispatch: + +concurrency: + group: har-${{ github.ref }} + cancel-in-progress: true + +jobs: + har: + name: Har + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: har-linux + + - name: cache-toolchain + id: cache-toolchain-ohos + uses: actions/cache@v4 + with: + path: command-line-tools + key: commandline-tools-linux-x64-5.0.5.200.zip + + - name: Download toolchain + if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true' + shell: bash + run: | + curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip + unzip commandline-tools-linux-x64-5.0.5.200.zip + rm commandline-tools-linux-x64-5.0.5.200.zip + + - name: Set environment variable + shell: bash + run: | + echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin" >> "$GITHUB_PATH" + which cmake + + cmake --version + + ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake + + echo "====" + cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake + echo "====" + + # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin" >> "$GITHUB_PATH" + + ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/ + echo "--" + ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown* + + cat $GITHUB_PATH + + # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true + export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH + echo "path: $PATH" + + which aarch64-unknown-linux-ohos-clang++ || true + which aarch64-unknown-linux-ohos-clang || true + + aarch64-unknown-linux-ohos-clang++ --version || true + aarch64-unknown-linux-ohos-clang --version || true + + which armv7-unknown-linux-ohos-clang++ + which armv7-unknown-linux-ohos-clang + + armv7-unknown-linux-ohos-clang++ --version + armv7-unknown-linux-ohos-clang --version + + which x86_64-unknown-linux-ohos-clang++ + which x86_64-unknown-linux-ohos-clang + + x86_64-unknown-linux-ohos-clang++ --version + x86_64-unknown-linux-ohos-clang --version + + - name: Install tree + shell: bash + run: | + sudo apt-get update -q + sudo apt-get install -y -q tree + + - name: Build libraries + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native" + + ./build-ohos-arm64-v8a.sh + ./build-ohos-x86-64.sh + + - name: Build Har + shell: bash + run: | + export PATH="$GITHUB_WORKSPACE/command-line-tools/bin:$PATH" + + which hvigorw + + pushd harmony-os/SherpaOnnxHar + + cp -fv ../../LICENSE ./sherpa_onnx + cp -fv ../../CHANGELOG.md ./sherpa_onnx + + hvigorw --mode module -p product=default -p module=sherpa_onnx@default assembleHar --analyze=normal --parallel --incremental --no-daemon + ls -lh ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har + cp -v ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har ../../ + + popd + + ls -lh *.har + + - name: View Har + shell: bash + run: | + file sherpa_onnx.har + tar xvf sherpa_onnx.har + + cd package + ls -lh + + ls -lh libs + echo "---libs/x86_64---" + ls -lh libs/x86_64 + + echo "---libs/arm64-v8a---" + ls -lh libs/arm64-v8a + + echo "---src/main/ets/components---" + ls -lh src/main/ets/components/ + + echo "---src/main/cpp/types/libsherpa_onnx/---" + ls -lh src/main/cpp/types/libsherpa_onnx/ + + tree . + + - name: Collect result + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV" + + mv sherpa_onnx.har sherpa_onnx-$SHERPA_ONNX_VERSION.har + + - uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-har + path: ./sherpa_onnx*.har + + - name: Release jar + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: ./*.har + # repo_name: k2-fsa/sherpa-onnx + # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + # tag: v1.10.32 + + - name: Publish to huggingface + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + d=har + mkdir -p $d + cp -v ../*.har $d/ + git status + git lfs track "*.har" + git add . + git commit -m "add more hars" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main diff --git a/.github/workflows/harmony-os.yaml b/.github/workflows/harmony-os.yaml new file mode 100644 index 0000000000..e1a2ae1a2e --- /dev/null +++ b/.github/workflows/harmony-os.yaml @@ -0,0 +1,159 @@ +name: harmony-os + +on: + push: + branches: + - master + - ohos + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + + workflow_dispatch: + +concurrency: + group: harmony-os-${{ github.ref }} + cancel-in-progress: true + +jobs: + harmony_os: + name: Harmony OS ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + arch: [arm64-v8a, armeabi-v7a, x86_64] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ohos-${{ matrix.arch }} + + - name: cache-toolchain + id: cache-toolchain-ohos + uses: actions/cache@v4 + with: + path: command-line-tools + key: commandline-tools-linux-x64-5.0.5.200.zip + + - name: Download toolchain + if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true' + shell: bash + run: | + curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip + unzip commandline-tools-linux-x64-5.0.5.200.zip + rm commandline-tools-linux-x64-5.0.5.200.zip + + - name: Set environment variable + shell: bash + run: | + echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin" >> "$GITHUB_PATH" + which cmake + + cmake --version + + ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake + + echo "====" + cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake + echo "====" + + # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin" >> "$GITHUB_PATH" + + ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/ + echo "--" + ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown* + + cat $GITHUB_PATH + + # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true + export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH + echo "path: $PATH" + + which aarch64-unknown-linux-ohos-clang++ || true + which aarch64-unknown-linux-ohos-clang || true + + aarch64-unknown-linux-ohos-clang++ --version || true + aarch64-unknown-linux-ohos-clang --version || true + + which armv7-unknown-linux-ohos-clang++ + which armv7-unknown-linux-ohos-clang + + armv7-unknown-linux-ohos-clang++ --version + armv7-unknown-linux-ohos-clang --version + + which x86_64-unknown-linux-ohos-clang++ + which x86_64-unknown-linux-ohos-clang + + x86_64-unknown-linux-ohos-clang++ --version + x86_64-unknown-linux-ohos-clang --version + + - name: Build ${{ matrix.arch }} + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + arch=${{ matrix.arch }} + + echo "arch: $arch" + + export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native" + + if [[ $arch == arm64-v8a ]]; then + ./build-ohos-arm64-v8a.sh + elif [[ $arch == armeabi-v7a ]]; then + ./build-ohos-armeabi-v7a.sh + elif [[ $arch == x86_64 ]]; then + ./build-ohos-x86-64.sh + else + echo "Unknown arch $arch" + fi + + - name: Collect result for ${{ matrix.arch }} + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV" + + arch=${{ matrix.arch }} + d=sherpa-onnx-$SHERPA_ONNX_VERSION-ohos-$arch + if [[ $arch == x86_64 ]]; then + cd ./build-ohos-x86-64 + else + cd ./build-ohos-$arch + fi + + mv install $d + tar cjfv $d.tar.bz2 $d + + ls -lh $d/lib + + + file $d/lib/* + + readelf -d $d/lib/libsherpa-onnx-c-api.so + + mv $d.tar.bz2 ../ + + - uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-ohos-${{ matrix.arch }} + path: ./*.tar.bz2 + + - name: Release jar + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: ./*.tar.bz2 + # repo_name: k2-fsa/sherpa-onnx + # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + # tag: v1.10.23 diff --git a/.github/workflows/jni.yaml b/.github/workflows/jni.yaml index a0f7693937..3bce5cdcd6 100644 --- a/.github/workflows/jni.yaml +++ b/.github/workflows/jni.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/jni.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'kotlin-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -16,7 +15,6 @@ on: - master paths: - '.github/workflows/jni.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'kotlin-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -75,3 +73,8 @@ jobs: cd ./kotlin-api-examples ./run.sh + + - uses: actions/upload-artifact@v4 + with: + name: tts-files-${{ matrix.os }} + path: kotlin-api-examples/test-*.wav diff --git a/.github/workflows/lazarus.yaml b/.github/workflows/lazarus.yaml index 11df536449..d28b7cba45 100644 --- a/.github/workflows/lazarus.yaml +++ b/.github/workflows/lazarus.yaml @@ -7,7 +7,6 @@ on: - lazarus paths: - '.github/workflows/lazarus.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'lazarus-examples/**' - 'sherpa-onnx/csrc/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/lazarus.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'lazarus-examples/**' - 'sherpa-onnx/csrc/*' @@ -43,7 +41,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macos-latest, macos-13, windows-latest] + os: [ubuntu-22.04, macos-latest, macos-13, windows-latest] steps: - uses: actions/checkout@v4 @@ -56,10 +54,10 @@ jobs: key: ${{ matrix.os }} # See https://github.com/gcarreno/setup-lazarus - - uses: gcarreno/setup-lazarus@v3 + - uses: gcarreno/setup-lazarus@v3.3.1 with: lazarus-version: "stable" - with-cache: true + with-cache: false - name: Lazarus info shell: bash @@ -79,14 +77,14 @@ jobs: uname -a - name: Install patchelf for ubuntu - if: matrix.os == 'ubuntu-20.04' + if: matrix.os == 'ubuntu-22.04' shell: bash run: | sudo apt-get update -q sudo apt-get install -q -y patchelf - name: Show Patchelf version (ubuntu) - if: matrix.os == 'ubuntu-20.04' + if: matrix.os == 'ubuntu-22.04' shell: bash run: | patchelf --version @@ -104,7 +102,7 @@ jobs: cd build os=${{ matrix.os }} - if [[ $os == 'windows-latest' || $os == 'ubuntu-20.04' ]]; then + if [[ $os == 'windows-latest' || $os == 'ubuntu-22.04' ]]; then BUILD_SHARED_LIBS=ON else BUILD_SHARED_LIBS=OFF @@ -139,7 +137,7 @@ jobs: lazbuild --verbose --build-mode=Release --widgetset=cocoa ./generate_subtitles.lpi elif [[ $os == macos-latest ]]; then lazbuild --verbose --build-mode=Release --widgetset=cocoa --cpu=aarch64 ./generate_subtitles.lpi - elif [[ $os == 'ubuntu-20.04' ]]; then + elif [[ $os == 'ubuntu-22.04' ]]; then lazbuild --verbose --build-mode=Release-Linux ./generate_subtitles.lpi else lazbuild --verbose --build-mode=Release ./generate_subtitles.lpi @@ -152,7 +150,7 @@ jobs: ls -lh - name: Collect generating subtitles (Ubuntu) - if: matrix.os == 'ubuntu-20.04' + if: matrix.os == 'ubuntu-22.04' shell: bash run: | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) @@ -223,7 +221,7 @@ jobs: ls -lh /tmp/macos-* - uses: actions/upload-artifact@v4 - if: matrix.os == 'ubuntu-20.04' + if: matrix.os == 'ubuntu-22.04' with: name: linux-x64 path: /tmp/linux-x64 @@ -355,8 +353,9 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - git clone https://huggingface.co/csukuangfj/sherpa-onnx-bin huggingface + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-bin huggingface cd huggingface + git remote set-url origin https://csukuangfj:$HF_TOKEN@huggingface.co/sherpa-onnx-bin git fetch git pull git merge -m "merge remote" --ff origin main diff --git a/.github/workflows/linux-gpu.yaml b/.github/workflows/linux-gpu.yaml index 2a9d0529d7..c1a97aa730 100644 --- a/.github/workflows/linux-gpu.yaml +++ b/.github/workflows/linux-gpu.yaml @@ -14,7 +14,6 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -31,7 +30,6 @@ on: - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' diff --git a/.github/workflows/linux-jni-aarch64.yaml b/.github/workflows/linux-jni-aarch64.yaml new file mode 100644 index 0000000000..19d1e09cf5 --- /dev/null +++ b/.github/workflows/linux-jni-aarch64.yaml @@ -0,0 +1,176 @@ +name: linux-jni-aarch64 + +on: + push: + branches: + - jni + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + workflow_dispatch: + +concurrency: + group: linux-jni-aarch64-${{ github.ref }} + cancel-in-progress: true + +jobs: + linux-jni-aarch64: + name: linux jni aarch64 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + # java-version: ['8', '11', '16', '17', '21'] + java-version: ['21'] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: ${{ matrix.java-version }} + + - name: Set up QEMU + if: steps.cache-build-result.outputs.cache-hit != 'true' + uses: docker/setup-qemu-action@v2 + with: + platforms: all + + - name: Display PWD + shell: bash + run: | + echo "pwd: $PWD" + ls -lh + du -h -d1 . + + - name: Build sherpa-onnx + if: matrix.java-version == '21' + uses: addnab/docker-run-action@v3 + with: + image: quay.io/pypa/manylinux2014_aarch64 + options: | + --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx + shell: bash + run: | + uname -a + gcc --version + cmake --version + cat /etc/*release + id + pwd + + yum install -y java-11-openjdk-devel + java -version + which java + ls -lh $(which java) + ls -lrt /etc/alternatives/java + + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-2.el7_9.aarch64 + echo "JAVA_HOME: $JAVA_HOME" + find $JAVA_HOME -name jni.h + + cd /home/runner/work/sherpa-onnx/sherpa-onnx + + git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib + pushd alsa-lib + ./gitcompile + popd + + export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH + export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs + + mkdir build + cd build + + cmake \ + -D SHERPA_ONNX_ENABLE_TTS=ON \ + -D CMAKE_BUILD_TYPE=Release \ + -D BUILD_SHARED_LIBS=ON \ + -D CMAKE_INSTALL_PREFIX=./install \ + -D SHERPA_ONNX_ENABLE_BINARY=OFF \ + -D SHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j2 + make install + + ls -lh lib + rm -rf ./install/lib/pkgconfig + rm -rf ./install/lib/share + rm -rf ./install/lib/cargs.h + rm -rf ./install/include/cargs.h + rm -rf ./install/lib/libcargs.so + rm -rf ./install/lib/libsherpa-onnx-c-api.so + + echo "----" + ls -lh install/lib + + echo "----" + + - uses: actions/upload-artifact@v4 + if: matrix.java-version == '21' + with: + name: release-jni-linux-${{ matrix.java-version }} + path: build/install/* + + - name: Copy files + if: matrix.java-version == '21' + shell: bash + run: | + du -h -d1 . + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-jni + mkdir $dst + + cp -a build/install/lib $dst/ + cp -a build/install/include $dst/ + + tree $dst + + tar cjvf ${dst}.tar.bz2 $dst + du -h -d1 . + + - name: Publish to huggingface + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && matrix.java-version == '21' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_CLONE_PROTECTION_ACTIVE=false + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface + + cd huggingface + mkdir -p jni + + cp -v ../sherpa-onnx-*.tar.bz2 ./jni + cp -v ../*.jar ./jni + + git status + git lfs track "*.bz2" + + git add . + + git commit -m "add more files" + + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main + + - name: Release pre-compiled binaries and libs for linux aarch64 + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.java-version == '21' + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: sherpa-onnx-*.tar.bz2 + diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index 0e1eca0990..ea3bd2b4a5 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -18,7 +18,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -38,7 +40,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -143,14 +147,34 @@ jobs: name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} path: install/* - - name: Test offline transducer + - name: Test offline TTS + if: matrix.with_tts == 'ON' + shell: bash + run: | + du -h -d1 . + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-tts + + .github/scripts/test-offline-tts.sh + du -h -d1 . + + - uses: actions/upload-artifact@v4 + if: matrix.with_tts == 'ON' + with: + name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} + path: tts + + - name: Test offline Moonshine + if: matrix.build_type != 'Debug' shell: bash run: | du -h -d1 . export PATH=$PWD/build/bin:$PATH export EXE=sherpa-onnx-offline - .github/scripts/test-offline-transducer.sh + readelf -d build/bin/sherpa-onnx-offline + + .github/scripts/test-offline-moonshine.sh du -h -d1 . - name: Test offline CTC @@ -163,6 +187,37 @@ jobs: .github/scripts/test-offline-ctc.sh du -h -d1 . + - name: Test C++ API + shell: bash + run: | + du -h -d1 . + export PATH=$PWD/build/bin:$PATH + export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api + export CXX_WHISPER_EXE=whisper-cxx-api + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api + + .github/scripts/test-cxx-api.sh + du -h -d1 . + + - name: Test offline speaker diarization + shell: bash + run: | + du -h -d1 . + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-speaker-diarization + + .github/scripts/test-speaker-diarization.sh + + - name: Test offline transducer + shell: bash + run: | + du -h -d1 . + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline + + .github/scripts/test-offline-transducer.sh + du -h -d1 . + - name: Test online punctuation shell: bash run: | @@ -269,16 +324,7 @@ jobs: .github/scripts/test-offline-whisper.sh du -h -d1 . - - name: Test offline TTS - if: matrix.with_tts == 'ON' - shell: bash - run: | - du -h -d1 . - export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx-offline-tts - .github/scripts/test-offline-tts.sh - du -h -d1 . - name: Test online paraformer shell: bash @@ -327,8 +373,4 @@ jobs: overwrite: true file: sherpa-onnx-*.tar.bz2 - - uses: actions/upload-artifact@v4 - with: - name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} - path: tts diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index 084531e4a6..813b8fd0eb 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -18,7 +18,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -37,7 +39,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -115,6 +119,45 @@ jobs: otool -L build/bin/sherpa-onnx otool -l build/bin/sherpa-onnx + - name: Test offline TTS + if: matrix.with_tts == 'ON' + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-tts + + .github/scripts/test-offline-tts.sh + + - name: Test offline Moonshine + if: matrix.build_type != 'Debug' + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline + + .github/scripts/test-offline-moonshine.sh + + - name: Test C++ API + shell: bash + run: | + du -h -d1 . + export PATH=$PWD/build/bin:$PATH + export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api + export CXX_WHISPER_EXE=whisper-cxx-api + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api + + .github/scripts/test-cxx-api.sh + du -h -d1 . + + - name: Test offline speaker diarization + shell: bash + run: | + du -h -d1 . + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-speaker-diarization + + .github/scripts/test-speaker-diarization.sh + - name: Test offline transducer shell: bash run: | @@ -190,15 +233,6 @@ jobs: .github/scripts/test-kws.sh - - name: Test offline TTS - if: matrix.with_tts == 'ON' - shell: bash - run: | - export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx-offline-tts - - .github/scripts/test-offline-tts.sh - - name: Test online paraformer shell: bash run: | @@ -216,8 +250,6 @@ jobs: .github/scripts/test-offline-whisper.sh - - - name: Test online transducer shell: bash run: | @@ -235,11 +267,12 @@ jobs: .github/scripts/test-online-transducer.sh - name: Copy files + if: matrix.build_type == 'Release' shell: bash run: | SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) - if [[ ${{ matrix.with_tts }} ]]; then + if [[ ${{ matrix.with_tts }} == ON ]]; then dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }} else dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}-no-tts @@ -256,7 +289,7 @@ jobs: tar cjvf ${dst}.tar.bz2 $dst - name: Release pre-compiled binaries and libs for macOS - if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + if: matrix.build_type == 'Release' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') uses: svenstaro/upload-release-action@v2 with: file_glob: true diff --git a/.github/workflows/mfc.yaml b/.github/workflows/mfc.yaml index e501478a29..1315092c2d 100644 --- a/.github/workflows/mfc.yaml +++ b/.github/workflows/mfc.yaml @@ -8,7 +8,6 @@ on: - 'v[0-9]+.[0-9]+.[0-9]+*' paths: - '.github/workflows/mfc.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'mfc-examples/**' - 'sherpa-onnx/csrc/*' @@ -18,7 +17,6 @@ on: - master paths: - '.github/workflows/mfc.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'mfc-examples/**' - 'sherpa-onnx/csrc/*' diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml index 2ed2131847..aed04e284f 100644 --- a/.github/workflows/pascal.yaml +++ b/.github/workflows/pascal.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/pascal.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'pascal-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -17,7 +16,6 @@ on: - master paths: - '.github/workflows/pascal.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'pascal-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -127,6 +125,21 @@ jobs: cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts fi + - name: Run Pascal test (Speaker diarization) + shell: bash + run: | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH + + cd ./pascal-api-examples + pushd speaker-diarization + + ./run.sh + rm -rfv *.onnx *.wav sherpa-onnx-* + ls -lh + echo "---" + + popd + - name: Run Pascal test (TTS) shell: bash run: | @@ -137,6 +150,31 @@ jobs: ./run-piper.sh rm -rf vits-piper-* + rm piper + ls -lh + echo "---" + + ./run-kokoro-zh-en.sh + rm -rf kokoro-multi-* + rm kokoro-zh-en + ls -lh + echo "---" + + ./run-kokoro-en.sh + rm -rf kokoro-en-* + rm kokoro-en + ls -lh + echo "---" + + ./run-matcha-zh.sh + rm -rf matcha-icefall-* + rm matcha-zh + ls -lh + echo "---" + + ./run-matcha-en.sh + rm -rf matcha-icefall-* + rm matcha-en ls -lh echo "---" @@ -150,6 +188,10 @@ jobs: cd ./pascal-api-examples pushd vad-with-non-streaming-asr + time ./run-vad-with-moonshine.sh + rm -rf sherpa-onnx-* + echo "---" + time ./run-vad-with-whisper.sh rm -rf sherpa-onnx-* echo "---" @@ -205,6 +247,10 @@ jobs: rm -rf sherpa-onnx-* echo "---" + ./run-moonshine.sh + rm -rf sherpa-onnx-* + echo "---" + ./run-whisper.sh rm -rf sherpa-onnx-* echo "---" diff --git a/.github/workflows/pkg-config.yaml b/.github/workflows/pkg-config.yaml index 57ed8a21a7..48ef160ba1 100644 --- a/.github/workflows/pkg-config.yaml +++ b/.github/workflows/pkg-config.yaml @@ -10,7 +10,6 @@ on: paths: - '.github/workflows/pkg-config.yaml' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -21,7 +20,6 @@ on: paths: - '.github/workflows/pkg-config.yaml' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' diff --git a/.github/workflows/release-dart-package.yaml b/.github/workflows/release-dart-package.yaml index f590403fea..cc830e2c2c 100644 --- a/.github/workflows/release-dart-package.yaml +++ b/.github/workflows/release-dart-package.yaml @@ -481,11 +481,8 @@ jobs: - name: Copy pre-built libs shell: bash run: | - echo "----ios-arm64----" - cp -v build-ios-shared/ios-arm64/libsherpa-onnx-c-api.dylib flutter/sherpa_onnx_ios/ios/ - cp -v build-ios-shared/ios-onnxruntime/onnxruntime.xcframework/ios-arm64/onnxruntime.a flutter/sherpa_onnx_ios/ios/libonnxruntime.a - - ls -lh flutter/sherpa_onnx_ios/ios/libonnxruntime.a + echo "----ios arm64 and arm64_x64_simulator----" + cp -av build-ios-shared/sherpa_onnx.xcframework flutter/sherpa_onnx_ios/ios/ mv -v flutter/sherpa_onnx_ios /tmp/to_be_published diff --git a/.github/workflows/riscv64-linux.yaml b/.github/workflows/riscv64-linux.yaml index e3e5e8b198..f81d5cb2ec 100644 --- a/.github/workflows/riscv64-linux.yaml +++ b/.github/workflows/riscv64-linux.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/riscv64-linux.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/riscv64-linux.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'toolchains/riscv64-linux-gnu.toolchain.cmake' diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 3e932707cc..ed5901e798 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/run-java-test.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'java-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -17,7 +16,6 @@ on: - master paths: - '.github/workflows/run-java-test.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'java-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -107,6 +105,38 @@ jobs: make -j4 ls -lh lib + - name: Run java test (Non-Streaming ASR) + shell: bash + run: | + cd ./java-api-examples + + ./run-non-streaming-decode-file-moonshine.sh + rm -rf sherpa-onnx-moonshine-* + + ./run-non-streaming-decode-file-sense-voice.sh + rm -rf sherpa-onnx-sense-voice-* + + ./run-inverse-text-normalization-paraformer.sh + + ./run-non-streaming-decode-file-paraformer.sh + rm -rf sherpa-onnx-paraformer-zh-* + + ./run-non-streaming-decode-file-transducer.sh + rm -rf sherpa-onnx-zipformer-* + + ./run-non-streaming-decode-file-whisper.sh + rm -rf sherpa-onnx-whisper-* + + ./run-non-streaming-decode-file-nemo.sh + rm -rf sherpa-onnx-nemo-* + + - name: Run java test (speaker diarization) + shell: bash + run: | + cd ./java-api-examples + ./run-offline-speaker-diarization.sh + rm -rfv *.onnx *.wav sherpa-onnx-pyannote-* + - name: Run java test (kws) shell: bash run: | @@ -199,32 +229,23 @@ jobs: ./run-streaming-decode-file-transducer.sh rm -rf sherpa-onnx-streaming-* - - name: Run java test (Non-Streaming ASR) + - name: Run java test (Non-Streaming TTS) shell: bash run: | cd ./java-api-examples - ./run-non-streaming-decode-file-sense-voice.sh - rm -rf sherpa-onnx-sense-voice-* - - ./run-inverse-text-normalization-paraformer.sh - - ./run-non-streaming-decode-file-paraformer.sh - rm -rf sherpa-onnx-paraformer-zh-* - - ./run-non-streaming-decode-file-transducer.sh - rm -rf sherpa-onnx-zipformer-* + ./run-non-streaming-tts-kokoro-zh-en.sh + ./run-non-streaming-tts-kokoro-en.sh + ./run-non-streaming-tts-matcha-zh.sh + ./run-non-streaming-tts-matcha-en.sh + ls -lh - ./run-non-streaming-decode-file-whisper.sh - rm -rf sherpa-onnx-whisper-* + rm -rf kokoro-multi-* + rm -rf kokoro-en-* - ./run-non-streaming-decode-file-nemo.sh - rm -rf sherpa-onnx-nemo-* + rm -rf matcha-icefall-* + rm hifigan_v2.onnx - - name: Run java test (Non-Streaming TTS) - shell: bash - run: | - cd ./java-api-examples ./run-non-streaming-tts-piper-en.sh rm -rf vits-piper-* diff --git a/.github/workflows/run-python-test-macos.yaml b/.github/workflows/run-python-test-macos.yaml index ed51379d2e..c9fafe68a3 100644 --- a/.github/workflows/run-python-test-macos.yaml +++ b/.github/workflows/run-python-test-macos.yaml @@ -7,7 +7,6 @@ on: paths: - '.github/workflows/run-python-test-macos.yaml' - '.github/scripts/test-python.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'python-api-examples/**' @@ -17,7 +16,6 @@ on: paths: - '.github/workflows/run-python-test-macos.yaml' - '.github/scripts/test-python.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'python-api-examples/**' @@ -54,6 +52,9 @@ jobs: - os: macos-latest python-version: "3.12" + - os: macos-latest + python-version: "3.13" + steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/run-python-test.yaml b/.github/workflows/run-python-test.yaml index 80fa86a746..7080420f94 100644 --- a/.github/workflows/run-python-test.yaml +++ b/.github/workflows/run-python-test.yaml @@ -7,7 +7,6 @@ on: paths: - '.github/workflows/run-python-test.yaml' - '.github/scripts/test-python.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'python-api-examples/**' @@ -17,7 +16,6 @@ on: paths: - '.github/workflows/run-python-test.yaml' - '.github/scripts/test-python.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'python-api-examples/**' @@ -53,6 +51,8 @@ jobs: python-version: "3.11" - os: ubuntu-22.04 python-version: "3.12" + - os: ubuntu-22.04 + python-version: "3.13" steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/sanitizer.yaml b/.github/workflows/sanitizer.yaml index 7fce3834a1..7cda968990 100644 --- a/.github/workflows/sanitizer.yaml +++ b/.github/workflows/sanitizer.yaml @@ -76,6 +76,15 @@ jobs: otool -L build/bin/sherpa-onnx otool -l build/bin/sherpa-onnx + - name: Test C++ API + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api + export CXX_WHISPER_EXE=whisper-cxx-api + + .github/scripts/test-cxx-api.sh + - name: Test online punctuation shell: bash run: | @@ -109,7 +118,6 @@ jobs: .github/scripts/test-online-ctc.sh - - name: Test C API shell: bash run: | diff --git a/.github/workflows/speaker-diarization.yaml b/.github/workflows/speaker-diarization.yaml index 0bd6a575ca..ab2a4f0904 100644 --- a/.github/workflows/speaker-diarization.yaml +++ b/.github/workflows/speaker-diarization.yaml @@ -67,7 +67,7 @@ jobs: curl -SL -O https://huggingface.co/csukuangfj/pyannote-models/resolve/main/segmentation-3.0/pytorch_model.bin test_wavs=( - 0-two-speakers-zh.wav + 0-four-speakers-zh.wav 1-two-speakers-en.wav 2-two-speakers-en.wav 3-two-speakers-en.wav diff --git a/.github/workflows/swift.yaml b/.github/workflows/swift.yaml index 3176c9b313..35bb7ab36d 100644 --- a/.github/workflows/swift.yaml +++ b/.github/workflows/swift.yaml @@ -4,10 +4,11 @@ on: push: branches: - master + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' paths: - './build-swift-macos.sh' - '.github/workflows/swift.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'swift-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -20,7 +21,6 @@ on: paths: - './build-swift-macos.sh' - '.github/workflows/swift.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'swift-api-examples/**' - 'sherpa-onnx/csrc/*' @@ -65,6 +65,30 @@ jobs: ./build-swift-macos.sh + - name: Copy files + if: matrix.os == 'macos-13' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-macos-xcframework-static + mkdir $dst + + mv -v build-swift-macos/sherpa-onnx.xcframework $dst + + brew install tree + tree $dst + + tar cjvf ${dst}.tar.bz2 $dst + + - name: Release pre-compiled binaries and libs for macOS + if: matrix.os == 'macos-13' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: sherpa-onnx-*macos-xcframework-static.tar.bz2 + - name: test shell: bash run: | diff --git a/.github/workflows/test-build-wheel.yaml b/.github/workflows/test-build-wheel.yaml index a9b2db5892..d9c863160a 100644 --- a/.github/workflows/test-build-wheel.yaml +++ b/.github/workflows/test-build-wheel.yaml @@ -7,7 +7,6 @@ on: paths: - 'setup.py' - '.github/workflows/test-build-wheel.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/python/**' @@ -17,7 +16,6 @@ on: paths: - 'setup.py' - '.github/workflows/test-build-wheel.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/python/**' @@ -139,7 +137,8 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.8/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.13.1/x64/bin:$PATH which sherpa-onnx sherpa-onnx --help diff --git a/.github/workflows/test-dart.yaml b/.github/workflows/test-dart.yaml index 58d5054902..d9e27e86fe 100644 --- a/.github/workflows/test-dart.yaml +++ b/.github/workflows/test-dart.yaml @@ -114,6 +114,7 @@ jobs: cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml + cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml diff --git a/.github/workflows/test-dot-net-nuget.yaml b/.github/workflows/test-dot-net-nuget.yaml index d325824414..b89781be56 100644 --- a/.github/workflows/test-dot-net-nuget.yaml +++ b/.github/workflows/test-dot-net-nuget.yaml @@ -75,10 +75,10 @@ jobs: run: | df -h - - name: Setup .NET 6.0 + - name: Setup .NET 8.0 uses: actions/setup-dotnet@v4 with: - dotnet-version: 6.0.x + dotnet-version: 8.0.x - name: Check dotnet run: dotnet --info diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index 6e32b155ec..9b46b64d97 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-dot-net.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'dotnet-examples/**' @@ -17,7 +16,6 @@ on: - master paths: - '.github/workflows/test-dot-net.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'dotnet-examples/**' @@ -47,8 +45,57 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-dotnet-release-shared + + - name: Build sherpa-onnx + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build + cd build + cmake \ + -DBUILD_SHARED_LIBS=ON \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ + .. + + cmake --build . --target install --config Release + + rm -rf install/share + rm -rf install/lib/pkg* + + ls -lh ./install/lib + + - uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.os }} + path: ./build/install/lib/ + + test-dot-net: + runs-on: ${{ matrix.os }} + needs: [build-libs] + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.8"] + + steps: + - name: Check space + shell: bash + run: | + df -h + - name: Free space - if: matrix.os == 'ubuntu-latest' shell: bash run: | df -h @@ -56,7 +103,6 @@ jobs: df -h - name: Free more space - if: matrix.os == 'ubuntu-latest' shell: bash run: | # https://github.com/orgs/community/discussions/25678 @@ -68,7 +114,6 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" - name: Free Disk Space (Ubuntu) - if: matrix.os == 'ubuntu-latest' uses: jlumbroso/free-disk-space@main with: # this might remove tools that are actually needed, @@ -85,51 +130,10 @@ jobs: swap-storage: true - name: Check space - if: matrix.os == 'ubuntu-latest' shell: bash run: | df -h - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 - with: - key: ${{ matrix.os }}-release-shared - - - name: Build sherpa-onnx - shell: bash - run: | - export CMAKE_CXX_COMPILER_LAUNCHER=ccache - export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - cmake --version - - mkdir build - cd build - cmake \ - -DBUILD_SHARED_LIBS=ON \ - -DCMAKE_INSTALL_PREFIX=./install \ - -DCMAKE_BUILD_TYPE=Release \ - -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ - -DBUILD_ESPEAK_NG_EXE=OFF \ - -DSHERPA_ONNX_ENABLE_BINARY=ON \ - .. - - cmake --build . --target install --config Release - - - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.os }} - path: ./build/install/lib/ - - test-dot-net: - runs-on: ${{ matrix.os }} - needs: [build-libs] - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: ["3.8"] - - steps: - uses: actions/checkout@v4 with: fetch-depth: 0 @@ -148,13 +152,12 @@ jobs: uses: actions/download-artifact@v4 with: name: ubuntu-latest - path: /tmp/linux + path: /tmp/linux-x64 - name: Setup .NET uses: actions/setup-dotnet@v4 with: - dotnet-version: | - 6.0.x + dotnet-version: 8.0.x - name: Check dotnet run: dotnet --info @@ -162,17 +165,21 @@ jobs: - name: Display files shell: bash run: | - echo "----------/tmp/----------" - ls -lh /tmp/ + echo "----------/tmp----------" + ls -lh /tmp - echo "----------/tmp/linux----------" - ls -lh /tmp/linux + echo "----------/tmp/linux-x64----------" + ls -lh /tmp/linux-x64 + df -h - name: Build shell: bash run: | cd scripts/dotnet ./run.sh + df -h + + ls -lh /tmp/packages - name: Copy files shell: bash @@ -181,9 +188,14 @@ jobs: ls -lh /tmp + df -h + - name: Run tests shell: bash run: | + dotnet nuget locals all --clear + df -h + .github/scripts/test-dot-net.sh - uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test-go-package.yaml b/.github/workflows/test-go-package.yaml index 2634e5ca75..f2e4cb1bc3 100644 --- a/.github/workflows/test-go-package.yaml +++ b/.github/workflows/test-go-package.yaml @@ -68,6 +68,64 @@ jobs: run: | gcc --version + - name: Test Keyword spotting + if: matrix.os != 'windows-latest' + shell: bash + run: | + cd go-api-examples/keyword-spotting-from-file/ + ./run.sh + + - name: Test adding punctuation + if: matrix.os != 'windows-latest' + shell: bash + run: | + cd go-api-examples/add-punctuation/ + ./run.sh + + - name: Test non-streaming speaker diarization + if: matrix.os != 'windows-latest' + shell: bash + run: | + cd go-api-examples/non-streaming-speaker-diarization/ + ./run.sh + + - name: Test non-streaming speaker diarization + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' + shell: bash + run: | + cd go-api-examples/non-streaming-speaker-diarization/ + go mod tidy + cat go.mod + go build + + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . + + ./run.sh + + - name: Test non-streaming speaker diarization + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' + shell: bash + run: | + cd go-api-examples/non-streaming-speaker-diarization/ + + go env GOARCH + go env -w GOARCH=386 + go env -w CGO_ENABLED=1 + + go mod tidy + cat go.mod + go build + + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . + + ./run.sh + - name: Test streaming HLG decoding (Linux/macOS) if: matrix.os != 'windows-latest' shell: bash @@ -151,6 +209,25 @@ jobs: go build ls -lh + echo "Test kokoro zh+en" + ./run-kokoro-zh-en.sh + rm -rf kokoro-multi-* + ls -lh + + echo "Test kokoro en" + ./run-kokoro-en.sh + rm -rf kokoro-en-* + ls -lh + + echo "Test matcha zh" + ./run-matcha-zh.sh + rm -rf matcha-icefall-* + + echo "Test matcha en" + ./run-matcha-en.sh + rm -rf matcha-icefall-* + ls -lh *.wav + echo "Test vits-ljs" ./run-vits-ljs.sh rm -rf vits-ljs @@ -188,6 +265,15 @@ jobs: cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . ls -lh + echo "Test matcha zh" + ./run-matcha-zh.sh + rm -rf matcha-icefall-* + + echo "Test matcha en" + ./run-matcha-en.sh + rm -rf matcha-icefall-* + ls -lh *.wav + echo "Test vits-ljs" ./run-vits-ljs.sh rm -rf vits-ljs @@ -233,6 +319,15 @@ jobs: cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . ls -lh + echo "Test matcha zh" + ./run-matcha-zh.sh + rm -rf matcha-icefall-* + + echo "Test matcha en" + ./run-matcha-en.sh + rm -rf matcha-icefall-* + ls -lh *.wav + echo "Test vits-ljs" ./run-vits-ljs.sh rm -rf vits-ljs diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index 65c72e1741..8d68076d71 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-go.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'go-api-examples/**' @@ -16,7 +15,6 @@ on: - master paths: - '.github/workflows/test-go.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'go-api-examples/**' @@ -134,53 +132,15 @@ jobs: name: ${{ matrix.os }}-libs path: to-upload/ - - name: Test speaker identification + - name: Test Keyword spotting shell: bash run: | - cd scripts/go/_internal/speaker-identification/ - ./run.sh + cd scripts/go/_internal/keyword-spotting-from-file/ - - name: Test streaming HLG decoding - shell: bash - run: | - cd scripts/go/_internal/streaming-hlg-decoding/ ./run.sh - - name: Test non-streaming TTS - shell: bash - run: | - mkdir tts-waves - - cd scripts/go/_internal/non-streaming-tts/ - ls -lh - go mod tidy - cat go.mod - go build ls -lh - echo "Test vits-ljs" - ./run-vits-ljs.sh - rm -rf vits-ljs - - echo "Test vits-vctk" - ./run-vits-vctk.sh - rm -rf vits-vctk - - echo "Test vits-zh-aishell3" - ./run-vits-zh-aishell3.sh - rm -rf vits-icefall-zh-aishell3 - - echo "Test vits-piper-en_US-lessac-medium" - ./run-vits-piper-en_US-lessac-medium.sh - rm -rf vits-piper-en_US-lessac-medium - - cp *.wav ../../../../tts-waves/ - - - uses: actions/upload-artifact@v4 - with: - name: tts-waves-${{ matrix.os }} - path: tts-waves - - name: Test non-streaming decoding files shell: bash run: | @@ -191,6 +151,10 @@ jobs: go build ls -lh + echo "Test Moonshine" + ./run-moonshine.sh + rm -rf sherpa-onnx-* + echo "Test SenseVoice ctc" ./run-sense-voice-small.sh rm -rf sherpa-onnx-sense-* @@ -224,6 +188,84 @@ jobs: ./run-tdnn-yesno.sh rm -rf sherpa-onnx-tdnn-yesno + - name: Test adding punctuation + shell: bash + run: | + cd scripts/go/_internal/add-punctuation/ + ./run.sh + + - name: Test non-streaming speaker diarization + shell: bash + run: | + cd scripts/go/_internal/non-streaming-speaker-diarization/ + ./run.sh + + - name: Test speaker identification + shell: bash + run: | + cd scripts/go/_internal/speaker-identification/ + ./run.sh + + - name: Test streaming HLG decoding + shell: bash + run: | + cd scripts/go/_internal/streaming-hlg-decoding/ + ./run.sh + + - name: Test non-streaming TTS + shell: bash + run: | + mkdir tts-waves + + cd scripts/go/_internal/non-streaming-tts/ + ls -lh + go mod tidy + cat go.mod + go build + ls -lh + + echo "Test kokoro zh+en" + ./run-kokoro-zh-en.sh + rm -rf kokoro-multi-* + ls -lh + + echo "Test kokoro en" + ./run-kokoro-en.sh + rm -rf kokoro-en-* + ls -lh + + echo "Test matcha zh" + ./run-matcha-zh.sh + rm -rf matcha-icefall-* + + echo "Test matcha en" + ./run-matcha-en.sh + rm -rf matcha-icefall-* + ls -lh *.wav + + echo "Test vits-ljs" + ./run-vits-ljs.sh + rm -rf vits-ljs + + echo "Test vits-vctk" + ./run-vits-vctk.sh + rm -rf vits-vctk + + echo "Test vits-zh-aishell3" + ./run-vits-zh-aishell3.sh + rm -rf vits-icefall-zh-aishell3 + + echo "Test vits-piper-en_US-lessac-medium" + ./run-vits-piper-en_US-lessac-medium.sh + rm -rf vits-piper-en_US-lessac-medium + + cp *.wav ../../../../tts-waves/ + + - uses: actions/upload-artifact@v4 + with: + name: tts-waves-${{ matrix.os }} + path: tts-waves + - name: Test streaming decoding files shell: bash run: | diff --git a/.github/workflows/test-nodejs-addon-api.yaml b/.github/workflows/test-nodejs-addon-api.yaml index 224fc0f0b6..539025c8c5 100644 --- a/.github/workflows/test-nodejs-addon-api.yaml +++ b/.github/workflows/test-nodejs-addon-api.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-api.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -17,7 +16,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-api.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' diff --git a/.github/workflows/test-nodejs-addon-npm-aarch64.yaml b/.github/workflows/test-nodejs-addon-npm-aarch64.yaml index 07ab8d8781..232f8fe27c 100644 --- a/.github/workflows/test-nodejs-addon-npm-aarch64.yaml +++ b/.github/workflows/test-nodejs-addon-npm-aarch64.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-npm-aarch64.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-npm-aarch64.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' diff --git a/.github/workflows/test-nodejs-addon-npm-win-x86.yaml b/.github/workflows/test-nodejs-addon-npm-win-x86.yaml index 98cba9dec1..0a21630dea 100644 --- a/.github/workflows/test-nodejs-addon-npm-win-x86.yaml +++ b/.github/workflows/test-nodejs-addon-npm-win-x86.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-npm-win-x86.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -20,7 +19,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-npm-win-x86.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' diff --git a/.github/workflows/test-nodejs-addon-npm.yaml b/.github/workflows/test-nodejs-addon-npm.yaml index 27a962357d..0e2b9f55fd 100644 --- a/.github/workflows/test-nodejs-addon-npm.yaml +++ b/.github/workflows/test-nodejs-addon-npm.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-npm.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -19,7 +18,6 @@ on: - master paths: - '.github/workflows/test-nodejs-addon-npm.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' diff --git a/.github/workflows/test-nodejs-npm.yaml b/.github/workflows/test-nodejs-npm.yaml index cc49ac0c40..e1358fd8d2 100644 --- a/.github/workflows/test-nodejs-npm.yaml +++ b/.github/workflows/test-nodejs-npm.yaml @@ -26,7 +26,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.8"] + python-version: ["3.10"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test-nodejs.yaml b/.github/workflows/test-nodejs.yaml index 25f3c38fdc..78788ad047 100644 --- a/.github/workflows/test-nodejs.yaml +++ b/.github/workflows/test-nodejs.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-nodejs.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' @@ -18,7 +17,6 @@ on: - master paths: - '.github/workflows/test-nodejs.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/c-api/*' diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml index 0f73e3643d..139e09a0e7 100644 --- a/.github/workflows/test-pip-install.yaml +++ b/.github/workflows/test-pip-install.yaml @@ -42,6 +42,8 @@ jobs: python-version: "3.11" - os: ubuntu-22.04 python-version: "3.12" + - os: ubuntu-22.04 + python-version: "3.13" - os: macos-12 python-version: "3.8" @@ -55,6 +57,8 @@ jobs: - os: macos-14 python-version: "3.12" + - os: macos-14 + python-version: "3.13" - os: windows-2019 python-version: "3.7" @@ -69,6 +73,8 @@ jobs: python-version: "3.11" - os: windows-2022 python-version: "3.12" + - os: windows-2022 + python-version: "3.13" steps: - uses: actions/checkout@v4 @@ -104,7 +110,8 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.8/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.13.1/x64/bin:$PATH sherpa-onnx --help sherpa-onnx-keyword-spotter --help diff --git a/.github/workflows/test-piper-phonemize.yaml b/.github/workflows/test-piper-phonemize.yaml index 1edbae6d2c..744095411d 100644 --- a/.github/workflows/test-piper-phonemize.yaml +++ b/.github/workflows/test-piper-phonemize.yaml @@ -5,7 +5,6 @@ on: - master paths: - '.github/workflows/test-piper-phonemize.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' pull_request: @@ -13,7 +12,6 @@ on: - master paths: - '.github/workflows/test-piper-phonemize.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' diff --git a/.github/workflows/test-python-offline-websocket-server.yaml b/.github/workflows/test-python-offline-websocket-server.yaml index 52a22ee5ae..4fa98464c5 100644 --- a/.github/workflows/test-python-offline-websocket-server.yaml +++ b/.github/workflows/test-python-offline-websocket-server.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-python-offline-websocket-server.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/python/**' @@ -15,7 +14,6 @@ on: - master paths: - '.github/workflows/test-python-offline-websocket-server.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/python/**' diff --git a/.github/workflows/test-python-online-websocket-server.yaml b/.github/workflows/test-python-online-websocket-server.yaml index badf343a0a..d22e93002a 100644 --- a/.github/workflows/test-python-online-websocket-server.yaml +++ b/.github/workflows/test-python-online-websocket-server.yaml @@ -6,7 +6,6 @@ on: - master paths: - '.github/workflows/test-python-online-websocket-server.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/python/**' @@ -15,7 +14,6 @@ on: - master paths: - '.github/workflows/test-python-online-websocket-server.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'sherpa-onnx/python/**' diff --git a/.github/workflows/wasm-simd-hf-space-de-tts.yaml b/.github/workflows/wasm-simd-hf-space-de-tts.yaml index cbd3b1fce6..76013291b7 100644 --- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml +++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml @@ -29,7 +29,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml index 510a003c7a..d34a182d41 100644 --- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml @@ -28,7 +28,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/wasm-simd-hf-space-en-tts.yaml b/.github/workflows/wasm-simd-hf-space-en-tts.yaml index 9c5c1d4469..d67ae88181 100644 --- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml +++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml @@ -29,7 +29,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml index dc8bada704..81052cac84 100644 --- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml +++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml @@ -29,7 +29,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml b/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml new file mode 100644 index 0000000000..14301f9f06 --- /dev/null +++ b/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml @@ -0,0 +1,167 @@ +name: wasm-simd-hf-space-speaker-diarization + +on: + push: + branches: + - wasm + - wasm-speaker-diarization + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + + workflow_dispatch: + +concurrency: + group: wasm-simd-hf-space-speaker-diarization-${{ github.ref }} + cancel-in-progress: true + +jobs: + wasm-simd-hf-space-speaker-diarization: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install emsdk + uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.53 + actions-cache-folder: 'emsdk-cache' + + - name: View emsdk version + shell: bash + run: | + emcc -v + echo "--------------------" + emcc --check + + - name: Download model files + shell: bash + run: | + cd wasm/speaker-diarization/assets/ + ls -lh + echo "----------" + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ./segmentation.onnx + rm -rf sherpa-onnx-pyannote-segmentation-3-0 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + mv 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ./embedding.onnx + + echo "----------" + + ls -lh + + - name: Build sherpa-onnx for WebAssembly + shell: bash + run: | + ./build-wasm-simd-speaker-diarization.sh + + - name: collect files + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-speaker-diarization + mv build-wasm-simd-speaker-diarization/install/bin/wasm/speaker-diarization $dst + ls -lh $dst + tar cjfv $dst.tar.bz2 ./$dst + + - name: Upload wasm files + uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-wasm-simd-speaker-diarization + path: ./sherpa-onnx-wasm-simd-*.tar.bz2 + + - name: Release + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: ./*.tar.bz2 + + - name: Publish to ModelScope + # if: false + env: + MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf ms + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git ms + cd ms + rm -fv *.js + rm -fv *.data + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* . + + git status + git lfs track "*.data" + git lfs track "*.wasm" + ls -lh + + git add . + git commit -m "update model" + git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx huggingface + ls -lh + + cd huggingface + rm -fv *.js + rm -fv *.data + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* . + + git status + git lfs track "*.data" + git lfs track "*.wasm" + ls -lh + + git add . + git commit -m "update model" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx main diff --git a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml index c093f0fe99..18c1c1d607 100644 --- a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml +++ b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml @@ -37,7 +37,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml index c72e0cef29..02a328a9bd 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml @@ -29,7 +29,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml index b76f912b47..1a72be6ab4 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml @@ -29,7 +29,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml index 9bdd90ee24..8b7c2029f7 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml @@ -29,7 +29,7 @@ jobs: - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: - version: 3.1.51 + version: 3.1.53 actions-cache-folder: 'emsdk-cache' - name: View emsdk version diff --git a/.github/workflows/windows-arm64.yaml b/.github/workflows/windows-arm64.yaml index a6d2a96da2..b6ab5bf7e9 100644 --- a/.github/workflows/windows-arm64.yaml +++ b/.github/workflows/windows-arm64.yaml @@ -8,7 +8,6 @@ on: - 'v[0-9]+.[0-9]+.[0-9]+*' paths: - '.github/workflows/windows-arm64.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' pull_request: @@ -16,7 +15,6 @@ on: - master paths: - '.github/workflows/windows-arm64.yaml' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -34,7 +32,7 @@ jobs: fail-fast: false matrix: os: [windows-latest] - shared_lib: [ON] + shared_lib: [ON, OFF] with_tts: [ON, OFF] steps: diff --git a/.github/workflows/windows-x64-cuda.yaml b/.github/workflows/windows-x64-cuda.yaml index fd45704558..0d15af946b 100644 --- a/.github/workflows/windows-x64-cuda.yaml +++ b/.github/workflows/windows-x64-cuda.yaml @@ -14,7 +14,6 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' pull_request: @@ -28,7 +27,6 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' diff --git a/.github/workflows/windows-x64-debug.yaml b/.github/workflows/windows-x64-debug.yaml index 09f93fd0d0..7abf022853 100644 --- a/.github/workflows/windows-x64-debug.yaml +++ b/.github/workflows/windows-x64-debug.yaml @@ -14,7 +14,6 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' pull_request: @@ -28,7 +27,6 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml index 2d2811c31e..76dd426238 100644 --- a/.github/workflows/windows-x64.yaml +++ b/.github/workflows/windows-x64.yaml @@ -17,7 +17,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' pull_request: @@ -34,7 +36,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -87,6 +91,32 @@ jobs: name: release-windows-x64-${{ matrix.shared_lib }}-${{ matrix.with_tts }} path: build/install/* + - name: Test offline Moonshine for windows x64 + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline.exe + + .github/scripts/test-offline-moonshine.sh + + - name: Test C++ API + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe + export CXX_WHISPER_EXE=whisper-cxx-api.exe + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe + + .github/scripts/test-cxx-api.sh + + - name: Test offline speaker diarization + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline-speaker-diarization.exe + + .github/scripts/test-speaker-diarization.sh + - name: Test online punctuation shell: bash run: | diff --git a/.github/workflows/windows-x86-debug.yaml b/.github/workflows/windows-x86-debug.yaml index f72bf25664..59d9ef3707 100644 --- a/.github/workflows/windows-x86-debug.yaml +++ b/.github/workflows/windows-x86-debug.yaml @@ -14,7 +14,6 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-offline-tts.sh' - '.github/scripts/test-online-ctc.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' pull_request: @@ -28,7 +27,6 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-offline-tts.sh' - '.github/scripts/test-online-ctc.sh' - - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml index 316cef6265..f1498c0c0c 100644 --- a/.github/workflows/windows-x86.yaml +++ b/.github/workflows/windows-x86.yaml @@ -17,7 +17,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' pull_request: @@ -34,7 +36,9 @@ on: - '.github/scripts/test-audio-tagging.sh' - '.github/scripts/test-offline-punctuation.sh' - '.github/scripts/test-online-punctuation.sh' - - 'CMakeLists.txt' + - '.github/scripts/test-speaker-diarization.sh' + - '.github/scripts/test-c-api.sh' + - '.github/scripts/test-cxx-api.sh' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -87,6 +91,32 @@ jobs: name: release-windows-x86-${{ matrix.shared_lib }}-${{ matrix.with_tts }} path: build/install/* + - name: Test offline Moonshine for windows x86 + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline.exe + + .github/scripts/test-offline-moonshine.sh + + - name: Test C++ API + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe + export CXX_WHISPER_EXE=whisper-cxx-api.exe + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe + + .github/scripts/test-cxx-api.sh + + - name: Test offline speaker diarization + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline-speaker-diarization.exe + + .github/scripts/test-speaker-diarization.sh + - name: Test online punctuation shell: bash run: | diff --git a/.gitignore b/.gitignore index b0fbfae781..ea356b0652 100644 --- a/.gitignore +++ b/.gitignore @@ -120,3 +120,16 @@ vits-melo-tts-zh_en sherpa-onnx-online-punct-en-2024-08-06 *.mp4 *.mp3 +sherpa-onnx-pyannote-segmentation-3-0 +sherpa-onnx-moonshine-tiny-en-int8 +sherpa-onnx-moonshine-base-en-int8 +harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE +harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md +matcha-icefall-zh-baker +matcha-icefall-en_US-ljspeech +kokoro-en-v0_19 +*.pt +lexicon.txt +us_gold.json +us_silver.json +kokoro-multi-lang-v1_0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 7af4a3f67b..4317d83976 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,233 @@ +## 1.10.42 + +* Fix publishing wheels (#1746) +* Update README to include https://github.com/xinhecuican/QSmartAssistant (#1755) +* Add Kokoro TTS to MFC examples (#1760) +* Refactor node-addon C++ code. (#1768) +* Add keyword spotter C API for HarmonyOS (#1769) +* Add ArkTS API for Keyword spotting. (#1775) +* Add Flutter example for Kokoro TTS (#1776) +* Initialize the audio session for iOS ASR example (#1786) +* Fix: Prepend 0 to tokenization to prevent word skipping for Kokoro. (#1787) +* Export Kokoro 1.0 to sherpa-onnx (#1788) +* Add C++ and Python API for Kokoro 1.0 multilingual TTS model (#1795) +* Add Java and Koltin API for Kokoro TTS 1.0 (#1798) +* Add Android demo for Kokoro TTS 1.0 (#1799) +* Add C API for Kokoro TTS 1.0 (#1801) +* Add CXX API for Kokoro TTS 1.0 (#1802) +* Add Swift API for Kokoro TTS 1.0 (#1803) +* Add Go API for Kokoro TTS 1.0 (#1804) +* Add C# API for Kokoro TTS 1.0 (#1805) +* Add Dart API for Kokoro TTS 1.0 (#1806) +* Add Pascal API for Kokoro TTS 1.0 (#1807) +* Add JavaScript API (node-addon) for Kokoro TTS 1.0 (#1808) +* Add JavaScript API (WebAssembly) for Kokoro TTS 1.0 (#1809) +* Add Flutter example for Kokoro TTS 1.0 (#1810) +* Add iOS demo for Kokoro TTS 1.0 (#1812) +* Add HarmonyOS demo for Kokoro TTS 1.0 (#1813) + +## 1.10.41 + +* Fix UI for Android TTS Engine. (#1735) +* Add iOS TTS example for MatchaTTS (#1736) +* Add iOS example for Kokoro TTS (#1737) +* Fix dither binding in Pybind11 to ensure independence from high_freq in FeatureExtractorConfig (#1739) +* Fix keyword spotting. (#1689) +* Update readme to include https://github.com/hfyydd/sherpa-onnx-server (#1741) +* Reduce vad-moonshine-c-api example code. (#1742) +* Support Kokoro TTS for HarmonyOS. (#1743) + +## 1.10.40 + +* Fix building wheels (#1703) +* Export kokoro to sherpa-onnx (#1713) +* Add C++ and Python API for Kokoro TTS models. (#1715) +* Add C API for Kokoro TTS models (#1717) +* Fix style issues (#1718) +* Add C# API for Kokoro TTS models (#1720) +* Add Swift API for Kokoro TTS models (#1721) +* Add Go API for Kokoro TTS models (#1722) +* Add Dart API for Kokoro TTS models (#1723) +* Add Pascal API for Kokoro TTS models (#1724) +* Add JavaScript API (node-addon) for Kokoro TTS models (#1725) +* Add JavaScript (WebAssembly) API for Kokoro TTS models. (#1726) +* Add Koltin and Java API for Kokoro TTS models (#1728) +* Update README.md for KWS to not use git lfs. (#1729) + + + + +## 1.10.39 + +* Fix building without TTS (#1691) +* Add README for android libs. (#1693) +* Fix: export-onnx.py(expected all tensors to be on the same device) (#1699) +* Fix passing strings from C# to C. (#1701) + +## 1.10.38 + +* Fix initializing TTS in Python. (#1664) +* Remove spaces after punctuations for TTS (#1666) +* Add constructor fromPtr() for all flutter class with factory ctor. (#1667) +* Add Kotlin API for Matcha-TTS models. (#1668) +* Support Matcha-TTS models using espeak-ng (#1672) +* Add Java API for Matcha-TTS models. (#1673) +* Avoid adding tail padding for VAD in generate-subtitles.py (#1674) +* Add C API for MatchaTTS models (#1675) +* Add CXX API for MatchaTTS models (#1676) +* Add JavaScript API (node-addon-api) for MatchaTTS models. (#1677) +* Add HarmonyOS examples for MatchaTTS. (#1678) +* Upgraded to .NET 8 and made code style a little more internally consistent. (#1680) +* Update workflows to use .NET 8.0 also. (#1681) +* Add C# and JavaScript (wasm) API for MatchaTTS models (#1682) +* Add Android demo for MatchaTTS models. (#1683) +* Add Swift API for MatchaTTS models. (#1684) +* Add Go API for MatchaTTS models (#1685) +* Add Pascal API for MatchaTTS models. (#1686) +* Add Dart API for MatchaTTS models (#1687) + +## 1.10.37 + +* Add new tts models for Latvia and Persian+English (#1644) +* Add a byte-level BPE Chinese+English non-streaming zipformer model (#1645) +* Support removing invalid utf-8 sequences. (#1648) +* Add TeleSpeech CTC to non_streaming_server.py (#1649) +* Fix building macOS libs (#1656) +* Add Go API for Keyword spotting (#1662) +* Add Swift online punctuation (#1661) +* Add C++ runtime for Matcha-TTS (#1627) + +## 1.10.36 + +* Update AAR version in Android Java demo (#1618) +* Support linking onnxruntime statically for Android (#1619) +* Update readme to include Open-LLM-VTuber (#1622) +* Rename maxNumStences to maxNumSentences (#1625) +* Support using onnxruntime 1.16.0 with CUDA 11.4 on Jetson Orin NX (Linux arm64 GPU). (#1630) +* Update readme to include jetson orin nx and nano b01 (#1631) +* feat: add checksum action (#1632) +* Support decoding with byte-level BPE (bbpe) models. (#1633) +* feat: enable c api for android ci (#1635) +* Update README.md (#1640) +* SherpaOnnxVadAsr: Offload runSecondPass to background thread for improved real-time audio processing (#1638) +* Fix GitHub actions. (#1642) + + +## 1.10.35 + +* Add missing changes about speaker identfication demo for HarmonyOS (#1612) +* Provide sherpa-onnx.aar for Android (#1615) +* Use aar in Android Java demo. (#1616) + +## 1.10.34 + +* Fix building node-addon package (#1598) +* Update doc links for HarmonyOS (#1601) +* Add on-device real-time ASR demo for HarmonyOS (#1606) +* Add speaker identification APIs for HarmonyOS (#1607) +* Add speaker identification demo for HarmonyOS (#1608) +* Add speaker diarization API for HarmonyOS. (#1609) +* Add speaker diarization demo for HarmonyOS (#1610) + +## 1.10.33 + +* Add non-streaming ASR support for HarmonyOS. (#1564) +* Add streaming ASR support for HarmonyOS. (#1565) +* Fix building for Android (#1568) +* Publish `sherpa_onnx.har` for HarmonyOS (#1572) +* Add VAD+ASR demo for HarmonyOS (#1573) +* Fix publishing har packages for HarmonyOS (#1576) +* Add CI to build HAPs for HarmonyOS (#1578) +* Add microphone demo about VAD+ASR for HarmonyOS (#1581) +* Fix getting microphone permission for HarmonyOS VAD+ASR example (#1582) +* Add HarmonyOS support for text-to-speech. (#1584) +* Fix: support both old and new websockets request headers format (#1588) +* Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590) + +## 1.10.32 + +* Support cross-compiling for HarmonyOS (#1553) +* HarmonyOS support for VAD. (#1561) +* Fix publishing flutter iOS app to appstore (#1563). + +## 1.10.31 + +* Publish pre-built wheels for Python 3.13 (#1485) +* Publish pre-built macos xcframework (#1490) +* Fix reading tokens.txt on Windows. (#1497) +* Add two-pass ASR Android APKs for Moonshine models. (#1499) +* Support building GPU-capable sherpa-onnx on Linux aarch64. (#1500) +* Publish pre-built wheels with CUDA support for Linux aarch64. (#1507) +* Export the English TTS model from MeloTTS (#1509) +* Add Lazarus example for Moonshine models. (#1532) +* Add isolate_tts demo (#1529) +* Add WebAssembly example for VAD + Moonshine models. (#1535) +* Add Android APK for streaming Paraformer ASR (#1538) +* Support static build for windows arm64. (#1539) +* Use xcframework for Flutter iOS plugin to support iOS simulators. + +## 1.10.30 + +* Fix building node-addon for Windows x86. (#1469) +* Begin to support https://github.com/usefulsensors/moonshine (#1470) +* Publish pre-built JNI libs for Linux aarch64 (#1472) +* Add C++ runtime and Python APIs for Moonshine models (#1473) +* Add Kotlin and Java API for Moonshine models (#1474) +* Add C and C++ API for Moonshine models (#1476) +* Add Swift API for Moonshine models. (#1477) +* Add Go API examples for adding punctuations to text. (#1478) +* Add Go API for Moonshine models (#1479) +* Add JavaScript API for Moonshine models (#1480) +* Add Dart API for Moonshine models. (#1481) +* Add Pascal API for Moonshine models (#1482) +* Add C# API for Moonshine models. (#1483) + +## 1.10.29 + +* Add Go API for offline punctuation models (#1434) +* Support https://huggingface.co/Revai/reverb-diarization-v1 (#1437) +* Add more models for speaker diarization (#1440) +* Add Java API example for hotwords. (#1442) +* Add java android demo (#1454) +* Add C++ API for streaming ASR. (#1455) +* Add C++ API for non-streaming ASR (#1456) +* Handle NaN embeddings in speaker diarization. (#1461) +* Add speaker identification with VAD and non-streaming ASR using ALSA (#1463) +* Support GigaAM CTC models for Russian ASR (#1464) +* Add GigaAM NeMo transducer model for Russian ASR (#1467) + +## 1.10.28 + +* Fix swift example for generating subtitles. (#1362) +* Allow more online models to load tokens file from the memory (#1352) +* Fix CI errors introduced by supporting loading keywords from buffers (#1366) +* Fix running MeloTTS models on GPU. (#1379) +* Support Parakeet models from NeMo (#1381) +* Export Pyannote speaker segmentation models to onnx (#1382) +* Support Agglomerative clustering. (#1384) +* Add Python API for clustering (#1385) +* support whisper turbo (#1390) +* context_state is not set correctly when previous context is passed after reset (#1393) +* Speaker diarization example with onnxruntime Python API (#1395) +* C++ API for speaker diarization (#1396) +* Python API for speaker diarization. (#1400) +* C API for speaker diarization (#1402) +* docs(nodejs-addon-examples): add guide for pnpm user (#1401) +* Go API for speaker diarization (#1403) +* Swift API for speaker diarization (#1404) +* Update readme to include more external projects using sherpa-onnx (#1405) +* C# API for speaker diarization (#1407) +* JavaScript API (node-addon) for speaker diarization (#1408) +* WebAssembly exmaple for speaker diarization (#1411) +* Handle audio files less than 10s long for speaker diarization. (#1412) +* JavaScript API with WebAssembly for speaker diarization (#1414) +* Kotlin API for speaker diarization (#1415) +* Java API for speaker diarization (#1416) +* Dart API for speaker diarization (#1418) +* Pascal API for speaker diarization (#1420) +* Android JNI support for speaker diarization (#1421) +* Android demo for speaker diarization (#1423) + ## 1.10.27 * Add non-streaming ONNX models for Russian ASR (#1358) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9084a0216a..ef6d45b18f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,10 +8,9 @@ set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) project(sherpa-onnx) # Remember to update -# ./nodejs-addon-examples -# ./dart-api-examples/ # ./CHANGELOG.md -set(SHERPA_ONNX_VERSION "1.10.27") +# ./new-release.sh +set(SHERPA_ONNX_VERSION "1.10.42") # Disable warning about # @@ -32,6 +31,7 @@ option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" O option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) option(SHERPA_ONNX_ENABLE_DIRECTML "Enable ONNX Runtime DirectML support" OFF) option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) +option(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION "Whether to enable WASM for speaker diarization" OFF) option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF) @@ -46,13 +46,18 @@ option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-i option(SHERPA_ONNX_ENABLE_SANITIZER "Whether to enable ubsan and asan" OFF) option(SHERPA_ONNX_BUILD_C_API_EXAMPLES "Whether to enable C API examples" ON) +set(SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION "1.11.0" CACHE STRING "Used only for Linux ARM64 GPU. If you use Jetson nano b01, then please set it to 1.11.0. If you use Jetson Orin NX, then set it to 1.16.0") + + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") -set(CMAKE_SKIP_BUILD_RPATH FALSE) -set(BUILD_RPATH_USE_ORIGIN TRUE) -set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +if(NOT WIN32) + set(CMAKE_SKIP_BUILD_RPATH FALSE) + set(BUILD_RPATH_USE_ORIGIN TRUE) + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +endif() if(NOT APPLE) set(SHERPA_ONNX_RPATH_ORIGIN "$ORIGIN") @@ -80,11 +85,6 @@ if(SHERPA_ONNX_ENABLE_PYTHON AND NOT BUILD_SHARED_LIBS) set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) endif() -if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS) - message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_JNI is ON") - set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) -endif() - if(SHERPA_ONNX_ENABLE_GPU) message(WARNING "\ Compiling for NVIDIA GPU is enabled. Please make sure cudatoolkit @@ -123,6 +123,11 @@ if(MSVC) ) endif() +if(CMAKE_SYSTEM_NAME STREQUAL OHOS) + set(CMAKE_CXX_FLAGS "-Wno-unused-command-line-argument ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-Wno-unused-command-line-argument ${CMAKE_C_FLAGS}") +endif() + message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") @@ -135,6 +140,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}") message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}") message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") +message(STATUS "SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION ${SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}") @@ -149,7 +155,7 @@ message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_ message(STATUS "SHERPA_ONNX_ENABLE_SANITIZER: ${SHERPA_ONNX_ENABLE_SANITIZER}") message(STATUS "SHERPA_ONNX_BUILD_C_API_EXAMPLES: ${SHERPA_ONNX_BUILD_C_API_EXAMPLES}") -if(BUILD_SHARED_LIBS) +if(BUILD_SHARED_LIBS OR SHERPA_ONNX_ENABLE_JNI) set(CMAKE_CXX_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -196,9 +202,19 @@ else() add_definitions(-DSHERPA_ONNX_ENABLE_DIRECTML=0) endif() +if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION) + if(NOT SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION to ON if you want to build WASM for speaker diarization") + endif() + + if(NOT SHERPA_ONNX_ENABLE_WASM) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for speaker diarization") + endif() +endif() + if(SHERPA_ONNX_ENABLE_WASM_TTS) if(NOT SHERPA_ONNX_ENABLE_TTS) - message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build wasm TTS") + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build WASM for TTS") endif() if(NOT SHERPA_ONNX_ENABLE_WASM) @@ -250,7 +266,7 @@ message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}") include(CheckIncludeFileCXX) -if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android) +if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android AND NOT CMAKE_SYSTEM_NAME STREQUAL OHOS) check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) if(SHERPA_ONNX_HAS_ALSA) message(STATUS "With Alsa") @@ -387,6 +403,7 @@ add_subdirectory(sherpa-onnx) if(SHERPA_ONNX_ENABLE_C_API AND SHERPA_ONNX_ENABLE_BINARY AND SHERPA_ONNX_BUILD_C_API_EXAMPLES) set(SHERPA_ONNX_PKG_WITH_CARGS "-lcargs") add_subdirectory(c-api-examples) + add_subdirectory(cxx-api-examples) endif() if(SHERPA_ONNX_ENABLE_WASM) diff --git a/README.md b/README.md index 890abe8827..b5fc25115e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ ### Supported functions -|Speech recognition| Speech synthesis | Speaker verification | Speaker identification | -|------------------|------------------|----------------------|------------------------| -| ✔️ | ✔️ | ✔️ | ✔️ | +|Speech recognition| Speech synthesis | +|------------------|------------------| +| ✔️ | ✔️ | + +|Speaker identification| Speaker diarization | Speaker verification | +|----------------------|-------------------- |------------------------| +| ✔️ | ✔️ | ✔️ | | Spoken Language identification | Audio tagging | Voice activity detection | |--------------------------------|---------------|--------------------------| @@ -14,14 +18,13 @@ ### Supported platforms -|Architecture| Android | iOS | Windows | macOS | linux | -|------------|---------|---------|------------|-------|-------| -| x64 | ✔️ | | ✔️ | ✔️ | ✔️ | -| x86 | ✔️ | | ✔️ | | | -| arm64 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | -| arm32 | ✔️ | | | | ✔️ | -| riscv64 | | | | | ✔️ | - +|Architecture| Android | iOS | Windows | macOS | linux | HarmonyOS | +|------------|---------|---------|------------|-------|-------|-----------| +| x64 | ✔️ | | ✔️ | ✔️ | ✔️ | ✔️ | +| x86 | ✔️ | | ✔️ | | | | +| arm64 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | +| arm32 | ✔️ | | | | ✔️ | ✔️ | +| riscv64 | | | | | ✔️ | | ### Supported programming languages @@ -47,6 +50,7 @@ This repository supports running the following functions **locally** - Speech-to-text (i.e., ASR); both streaming and non-streaming are supported - Text-to-speech (i.e., TTS) + - Speaker diarization - Speaker identification - Speaker verification - Spoken language identification @@ -60,8 +64,11 @@ on the following platforms and operating systems: - Linux, macOS, Windows, openKylin - Android, WearOS - iOS + - HarmonyOS - NodeJS - WebAssembly + - [NVIDIA Jetson Orin NX][NVIDIA Jetson Orin NX] (Support running on both CPU and GPU) + - [NVIDIA Jetson Nano B01][NVIDIA Jetson Nano B01] (Support running on both CPU and GPU) - [Raspberry Pi][Raspberry Pi] - [RV1126][RV1126] - [LicheePi4A][LicheePi4A] @@ -79,17 +86,19 @@ with the following APIs ### Links for Huggingface Spaces -You can visit the following Huggingface spaces to try `sherpa-onnx` without -installing anything. All you need is a browser. - -| Description | URL | -|-------------------------------------------------------|------------------------------------| -| Speech recognition | [Click me][hf-space-asr] | -| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] | -| Speech synthesis | [Click me][hf-space-tts] | -| Generate subtitles | [Click me][hf-space-subtitle] | -| Audio tagging | [Click me][hf-space-audio-tagging] | -| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] | +
+You can visit the following Huggingface spaces to try sherpa-onnx without +installing anything. All you need is a browser. + +| Description | URL | +|-------------------------------------------------------|-----------------------------------------| +| Speaker diarization | [Click me][hf-space-speaker-diarization]| +| Speech recognition | [Click me][hf-space-asr] | +| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] | +| Speech synthesis | [Click me][hf-space-tts] | +| Generate subtitles | [Click me][hf-space-subtitle] | +| Audio tagging | [Click me][hf-space-audio-tagging] | +| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] | We also have spaces built using WebAssembly. They are listed below: @@ -102,6 +111,7 @@ We also have spaces built using WebAssembly. They are listed below: |Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer] |[地址][wasm-ms-streaming-asr-en-zipformer]| |VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]| |VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]| +|VAD + speech recognition (English) with [Moonshine tiny][Moonshine tiny]|[Click me][wasm-hf-vad-asr-en-moonshine-tiny-en]| [地址][wasm-ms-vad-asr-en-moonshine-tiny-en]| |VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech] |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]| |VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech] |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]| |VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]| @@ -111,24 +121,36 @@ We also have spaces built using WebAssembly. They are listed below: |VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]| |Speech synthesis (English) |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]| |Speech synthesis (German) |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]| +|Speaker diarization |[Click me][wasm-hf-speaker-diarization]|[地址][wasm-ms-speaker-diarization]| + +
### Links for pre-built Android APKs -| Description | URL | 中国用户 | -|----------------------------------------|------------------------------|-----------------------------| -| Streaming speech recognition | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]| -| Text-to-speech | [Address][apk-tts] | [点此][apk-tts-cn] | -| Voice activity detection (VAD) | [Address][apk-vad] | [点此][apk-vad-cn] | -| VAD + non-streaming speech recognition | [Address][apk-vad-asr] | [点此][apk-vad-asr-cn] | -| Two-pass speech recognition | [Address][apk-2pass] | [点此][apk-2pass-cn] | -| Audio tagging | [Address][apk-at] | [点此][apk-at-cn] | -| Audio tagging (WearOS) | [Address][apk-at-wearos] | [点此][apk-at-wearos-cn] | -| Speaker identification | [Address][apk-sid] | [点此][apk-sid-cn] | -| Spoken language identification | [Address][apk-slid] | [点此][apk-slid-cn] | -| Keyword spotting | [Address][apk-kws] | [点此][apk-kws-cn] | +
+ +You can find pre-built Android APKs for this repository in the following table + +| Description | URL | 中国用户 | +|----------------------------------------|------------------------------------|-----------------------------------| +| Speaker diarization | [Address][apk-speaker-diarization] | [点此][apk-speaker-diarization-cn]| +| Streaming speech recognition | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn] | +| Text-to-speech | [Address][apk-tts] | [点此][apk-tts-cn] | +| Voice activity detection (VAD) | [Address][apk-vad] | [点此][apk-vad-cn] | +| VAD + non-streaming speech recognition | [Address][apk-vad-asr] | [点此][apk-vad-asr-cn] | +| Two-pass speech recognition | [Address][apk-2pass] | [点此][apk-2pass-cn] | +| Audio tagging | [Address][apk-at] | [点此][apk-at-cn] | +| Audio tagging (WearOS) | [Address][apk-at-wearos] | [点此][apk-at-wearos-cn] | +| Speaker identification | [Address][apk-sid] | [点此][apk-sid-cn] | +| Spoken language identification | [Address][apk-slid] | [点此][apk-slid-cn] | +| Keyword spotting | [Address][apk-kws] | [点此][apk-kws-cn] | + +
### Links for pre-built Flutter APPs +
+ #### Real-time speech recognition | Description | URL | 中国用户 | @@ -147,17 +169,24 @@ We also have spaces built using WebAssembly. They are listed below: > Note: You need to build from source for iOS. +
+ ### Links for pre-built Lazarus APPs +
+ #### Generating subtitles | Description | URL | 中国用户 | |--------------------------------|----------------------------|----------------------------| | Generate subtitles (生成字幕) | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]| +
### Links for pre-trained models +
+ | Description | URL | |---------------------------------------------|---------------------------------------------------------------------------------------| | Speech recognition (speech to text, ASR) | [Address][asr-models] | @@ -168,6 +197,64 @@ We also have spaces built using WebAssembly. They are listed below: | Speaker identification (Speaker ID) | [Address][sid-models] | | Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from [Speech recognition][asr-models]| | Punctuation | [Address][punct-models] | +| Speaker segmentation | [Address][speaker-segmentation-models] | + +
+ +#### Some pre-trained ASR models (Streaming) + +
+ +Please see + + - + - + - + +for more models. The following table lists only **SOME** of them. + + +|Name | Supported Languages| Description| +|-----|-----|----| +|[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20][sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english)| +|[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16][sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16-bilingual-chinese-english)| +|[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23][sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]|Chinese| Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23)| +|[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17][sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]|English|Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-en-20m-2023-02-17)| +|[sherpa-onnx-streaming-zipformer-korean-2024-06-16][sherpa-onnx-streaming-zipformer-korean-2024-06-16]|Korean| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-korean-2024-06-16-korean)| +|[sherpa-onnx-streaming-zipformer-fr-2023-04-14][sherpa-onnx-streaming-zipformer-fr-2023-04-14]|French| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#shaojieli-sherpa-onnx-streaming-zipformer-fr-2023-04-14-french)| + +
+ + +#### Some pre-trained ASR models (Non-Streaming) + +
+ +Please see + + - + - + - + - + - + +for more models. The following table lists only **SOME** of them. + +|Name | Supported Languages| Description| +|-----|-----|----| +|[Whisper tiny.en](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2)|English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html)| +|[Moonshine tiny][Moonshine tiny]|English|See [also](https://github.com/usefulsensors/moonshine)| +|[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17][sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]|Chinese, Cantonese, English, Korean, Japanese| 支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html)| +|[sherpa-onnx-paraformer-zh-2024-03-09][sherpa-onnx-paraformer-zh-2024-03-09]|Chinese, English| 也支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2024-03-09-chinese-english)| +|[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01][sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]|Japanese|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01-japanese)| +|[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24][sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24-russian)| +|[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24][sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]|Russian| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/russian.html#sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24)| +|[sherpa-onnx-zipformer-ru-2024-09-18][sherpa-onnx-zipformer-ru-2024-09-18]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ru-2024-09-18-russian)| +|[sherpa-onnx-zipformer-korean-2024-06-24][sherpa-onnx-zipformer-korean-2024-06-24]|Korean|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-korean-2024-06-24-korean)| +|[sherpa-onnx-zipformer-thai-2024-06-20][sherpa-onnx-zipformer-thai-2024-06-20]|Thai| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-thai-2024-06-20-thai)| +|[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04][sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]|Chinese| 支持多种方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html#sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04)| + +
### Useful links @@ -182,6 +269,13 @@ for 新一代 Kaldi **微信交流群** and **QQ 交流群**. ## Projects using sherpa-onnx +### [Open-LLM-VTuber](https://github.com/t41372/Open-LLM-VTuber) + +Talk to any LLM with hands-free voice interaction, voice interruption, and Live2D taking +face running locally across platforms + +See also + ### [voiceapi](https://github.com/ruzhila/voiceapi)
@@ -191,10 +285,30 @@ for 新一代 Kaldi **微信交流群** and **QQ 交流群**. It shows how to use the ASR and TTS Python APIs with FastAPI.
-### [TMSpeech](https://github.com/jxlpzqc/TMSpeech) +### [腾讯会议摸鱼工具 TMSpeech](https://github.com/jxlpzqc/TMSpeech) Uses streaming ASR in C# with graphical user interface. +Video demo in Chinese: [【开源】Windows实时字幕软件(网课/开会必备)](https://www.bilibili.com/video/BV1rX4y1p7Nx) + +### [lol互动助手](https://github.com/l1veIn/lol-wom-electron) + +It uses the JavaScript API of sherpa-onnx along with [Electron](https://electronjs.org/) + +Video demo in Chinese: [爆了!炫神教你开打字挂!真正影响胜率的英雄联盟工具!英雄联盟的最后一块拼图!和游戏中的每个人无障碍沟通!](https://www.bilibili.com/video/BV142tje9E74) + +### [Sherpa-ONNX 语音识别服务器](https://github.com/hfyydd/sherpa-onnx-server) + +A server based on nodejs providing Restful API for speech recognition. + +### [QSmartAssistant](https://github.com/xinhecuican/QSmartAssistant) + +一个模块化,全过程可离线,低占用率的对话机器人/智能音箱 + +It uses QT. Both [ASR](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#asr) +and [TTS](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#tts) +are used. + [sherpa-rs]: https://github.com/thewh1teagle/sherpa-rs [silero-vad]: https://github.com/snakers4/silero-vad @@ -204,6 +318,7 @@ Uses streaming ASR in C# with graphical user interface. [VisionFive 2]: https://www.starfivetech.com/en/site/boards [旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html [爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html +[hf-space-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/speaker-diarization [hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition [Whisper]: https://github.com/openai/whisper [hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper @@ -227,6 +342,8 @@ Uses streaming ASR in C# with graphical user interface. [wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice [wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny [wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny +[wasm-hf-vad-asr-en-moonshine-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny +[wasm-ms-vad-asr-en-moonshine-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny [wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech [wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech [wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech @@ -248,6 +365,10 @@ Uses streaming ASR in C# with graphical user interface. [wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en [wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de [wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de +[wasm-hf-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx +[wasm-ms-speaker-diarization]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx +[apk-speaker-diarization]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk.html +[apk-speaker-diarization-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk-cn.html [apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html [apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html [apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html @@ -290,5 +411,24 @@ Uses streaming ASR in C# with graphical user interface. [sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models [slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models [punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models +[speaker-segmentation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models [GigaSpeech]: https://github.com/SpeechColab/GigaSpeech [WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech +[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16.tar.bz2 +[sherpa-onnx-streaming-zipformer-korean-2024-06-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-korean-2024-06-16.tar.bz2 +[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2 +[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 +[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2 +[sherpa-onnx-zipformer-ru-2024-09-18]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ru-2024-09-18.tar.bz2 +[sherpa-onnx-zipformer-korean-2024-06-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-korean-2024-06-24.tar.bz2 +[sherpa-onnx-zipformer-thai-2024-06-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-thai-2024-06-20.tar.bz2 +[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24.tar.bz2 +[sherpa-onnx-paraformer-zh-2024-03-09]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2024-03-09.tar.bz2 +[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2 +[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 +[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +[sherpa-onnx-streaming-zipformer-fr-2023-04-14]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-fr-2023-04-14.tar.bz2 +[Moonshine tiny]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 +[NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9 +[NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/ diff --git a/android/README.md b/android/README.md index 42b29e08f3..bae3355987 100644 --- a/android/README.md +++ b/android/README.md @@ -4,6 +4,8 @@ Please refer to https://k2-fsa.github.io/sherpa/onnx/android/index.html for usage. +- [SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization) It is for speaker diarization. + - [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model. - [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model diff --git a/android/SherpaOnnxAar/.gitignore b/android/SherpaOnnxAar/.gitignore new file mode 100644 index 0000000000..aa724b7707 --- /dev/null +++ b/android/SherpaOnnxAar/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnxAar/README.md b/android/SherpaOnnxAar/README.md new file mode 100644 index 0000000000..3238153c2a --- /dev/null +++ b/android/SherpaOnnxAar/README.md @@ -0,0 +1,20 @@ +# Usage of this project + +``` +git clone https://github.com/k2-fsa/sherpa-onnx +cd sherpa-onnx + +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.10.42/sherpa-onnx-v1.10.42-android.tar.bz2 +tar xvf sherpa-onnx-v1.10.42-android.tar.bz2 + +cp -v jniLibs/arm64-v8a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/ +cp -v jniLibs/armeabi-v7a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/ +cp -v jniLibs/x86/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/ +cp -v jniLibs/x86_64/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/ + +cd android/SherpaOnnxAar + +./gradlew :sherpa_onnx:assembleRelease +ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar +cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.10.42.aar +``` diff --git a/android/SherpaOnnxAar/build.gradle.kts b/android/SherpaOnnxAar/build.gradle.kts new file mode 100644 index 0000000000..e3f8a07411 --- /dev/null +++ b/android/SherpaOnnxAar/build.gradle.kts @@ -0,0 +1,6 @@ +// Top-level build file where you can add configuration options common to all sub-projects/modules. +plugins { + alias(libs.plugins.android.application) apply false + alias(libs.plugins.jetbrains.kotlin.android) apply false + alias(libs.plugins.android.library) apply false +} \ No newline at end of file diff --git a/android/SherpaOnnxAar/gradle.properties b/android/SherpaOnnxAar/gradle.properties new file mode 100644 index 0000000000..20e2a01520 --- /dev/null +++ b/android/SherpaOnnxAar/gradle.properties @@ -0,0 +1,23 @@ +# Project-wide Gradle settings. +# IDE (e.g. Android Studio) users: +# Gradle settings configured through the IDE *will override* +# any settings specified in this file. +# For more details on how to configure your build environment visit +# http://www.gradle.org/docs/current/userguide/build_environment.html +# Specifies the JVM arguments used for the daemon process. +# The setting is particularly useful for tweaking memory settings. +org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 +# When configured, Gradle will run in incubating parallel mode. +# This option should only be used with decoupled projects. For more details, visit +# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects +# org.gradle.parallel=true +# AndroidX package structure to make it clearer which packages are bundled with the +# Android operating system, and which are packaged with your app's APK +# https://developer.android.com/topic/libraries/support-library/androidx-rn +android.useAndroidX=true +# Kotlin code style for this project: "official" or "obsolete": +kotlin.code.style=official +# Enables namespacing of each library's R class so that its R class includes only the +# resources declared in the library itself and none from the library's dependencies, +# thereby reducing the size of the R class for that library +android.nonTransitiveRClass=true \ No newline at end of file diff --git a/android/SherpaOnnxAar/gradle/libs.versions.toml b/android/SherpaOnnxAar/gradle/libs.versions.toml new file mode 100644 index 0000000000..56172d2933 --- /dev/null +++ b/android/SherpaOnnxAar/gradle/libs.versions.toml @@ -0,0 +1,23 @@ +[versions] +agp = "8.4.0" +kotlin = "1.7.20" +coreKtx = "1.15.0" +junit = "4.13.2" +junitVersion = "1.2.1" +espressoCore = "3.6.1" +appcompat = "1.7.0" +material = "1.12.0" + +[libraries] +androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" } +junit = { group = "junit", name = "junit", version.ref = "junit" } +androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" } +androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" } +androidx-appcompat = { group = "androidx.appcompat", name = "appcompat", version.ref = "appcompat" } +material = { group = "com.google.android.material", name = "material", version.ref = "material" } + +[plugins] +android-application = { id = "com.android.application", version.ref = "agp" } +jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" } +android-library = { id = "com.android.library", version.ref = "agp" } + diff --git a/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000..e708b1c023 Binary files /dev/null and b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar differ diff --git a/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000..a8b3563581 --- /dev/null +++ b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Thu Dec 12 14:02:30 CST 2024 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/android/SherpaOnnxAar/gradlew b/android/SherpaOnnxAar/gradlew new file mode 100755 index 0000000000..4f906e0c81 --- /dev/null +++ b/android/SherpaOnnxAar/gradlew @@ -0,0 +1,185 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/android/SherpaOnnxAar/gradlew.bat b/android/SherpaOnnxAar/gradlew.bat new file mode 100644 index 0000000000..ac1b06f938 --- /dev/null +++ b/android/SherpaOnnxAar/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/android/SherpaOnnxAar/settings.gradle.kts b/android/SherpaOnnxAar/settings.gradle.kts new file mode 100644 index 0000000000..53ee52b547 --- /dev/null +++ b/android/SherpaOnnxAar/settings.gradle.kts @@ -0,0 +1,23 @@ +pluginManagement { + repositories { + google { + content { + includeGroupByRegex("com\\.android.*") + includeGroupByRegex("com\\.google.*") + includeGroupByRegex("androidx.*") + } + } + mavenCentral() + gradlePluginPortal() + } +} +dependencyResolutionManagement { + repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) + repositories { + google() + mavenCentral() + } +} + +rootProject.name = "SherpaOnnxAar" +include(":sherpa_onnx") diff --git a/android/SherpaOnnxAar/sherpa_onnx/.gitignore b/android/SherpaOnnxAar/sherpa_onnx/.gitignore new file mode 100644 index 0000000000..42afabfd2a --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts b/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts new file mode 100644 index 0000000000..4803cb8378 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts @@ -0,0 +1,43 @@ +plugins { + alias(libs.plugins.android.library) + alias(libs.plugins.jetbrains.kotlin.android) +} + +android { + namespace = "com.k2fsa.sherpa.onnx" + compileSdk = 34 + + defaultConfig { + minSdk = 21 + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + consumerProguardFiles("consumer-rules.pro") + } + + buildTypes { + release { + isMinifyEnabled = false + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = "1.8" + } +} + +dependencies { + + implementation(libs.androidx.core.ktx) + implementation(libs.androidx.appcompat) + implementation(libs.material) + testImplementation(libs.junit) + androidTestImplementation(libs.androidx.junit) + androidTestImplementation(libs.androidx.espresso.core) +} \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/consumer-rules.pro b/android/SherpaOnnxAar/sherpa_onnx/consumer-rules.pro new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro b/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro new file mode 100644 index 0000000000..481bb43481 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt b/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt new file mode 100644 index 0000000000..db1fbefc31 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.k2fsa.sherpa.onnx + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.k2fsa.sherpa.onnx.test", appContext.packageName) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml b/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml new file mode 100644 index 0000000000..a5918e68ab --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt new file mode 120000 index 0000000000..25c36e3965 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/AudioTagging.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt new file mode 120000 index 0000000000..952fae878a --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt new file mode 120000 index 0000000000..4392376a1f --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt new file mode 120000 index 0000000000..1eed71678d --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflinePunctuation.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt new file mode 120000 index 0000000000..faa3ab4acf --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt new file mode 120000 index 0000000000..d850dd7fdc --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt new file mode 120000 index 0000000000..2a3aff864e --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt new file mode 120000 index 0000000000..5bb19ee10e --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt new file mode 120000 index 0000000000..d4518b89bf --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt new file mode 120000 index 0000000000..66441dea73 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/Speaker.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt new file mode 120000 index 0000000000..754102447c --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt new file mode 120000 index 0000000000..de79a7d20a --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt new file mode 120000 index 0000000000..f1392e7712 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/Tts.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt new file mode 120000 index 0000000000..761b158ce9 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt new file mode 120000 index 0000000000..05c8fb2463 --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt \ No newline at end of file diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt b/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt new file mode 100644 index 0000000000..05dfcd635f --- /dev/null +++ b/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt @@ -0,0 +1,17 @@ +package com.k2fsa.sherpa.onnx + +import org.junit.Test + +import org.junit.Assert.* + +/** + * Example local unit test, which will execute on the development machine (host). + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +class ExampleUnitTest { + @Test + fun addition_isCorrect() { + assertEquals(4, 2 + 2) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/.gitignore b/android/SherpaOnnxJavaDemo/.gitignore new file mode 100644 index 0000000000..aa724b7707 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnxJavaDemo/README.md b/android/SherpaOnnxJavaDemo/README.md new file mode 100644 index 0000000000..8d7b84dbd5 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/README.md @@ -0,0 +1,44 @@ +# Introduction + +Please run the following commands to download model files before you run this Android demo: + +```bash +# Assume we are inside +# /Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo + +cd app/src/main/assets/ +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ./ +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ./ +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ./ +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ./ + +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/* + +mv encoder-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ +mv decoder-epoch-99-avg-1.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ +mv joiner-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ +mv tokens.txt sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ +``` + +You should have the following directory structure: +``` +(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd +/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo/app/src/main/assets + +(py38) fangjuns-MacBook-Pro:assets fangjun$ tree . +. +└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + ├── decoder-epoch-99-avg-1.onnx + ├── encoder-epoch-99-avg-1.int8.onnx + ├── joiner-epoch-99-avg-1.int8.onnx + └── tokens.txt + +1 directory, 4 files +``` + +Remember to remove unused files to reduce the file size of the final APK. diff --git a/android/SherpaOnnxJavaDemo/app/.gitignore b/android/SherpaOnnxJavaDemo/app/.gitignore new file mode 100644 index 0000000000..42afabfd2a --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/build.gradle b/android/SherpaOnnxJavaDemo/app/build.gradle new file mode 100644 index 0000000000..d8ccc7a608 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/build.gradle @@ -0,0 +1,38 @@ +plugins { + id 'com.android.application' +} + +android { + compileSdk 34 + + defaultConfig { + applicationId "com.k2fsa.sherpa.onnx" + minSdk 28 + targetSdk 34 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } +} + +dependencies { + implementation 'androidx.appcompat:appcompat:1.3.1' + implementation 'com.google.android.material:material:1.3.0' + implementation 'androidx.constraintlayout:constraintlayout:1.1.3' + implementation 'pub.devrel:easypermissions:3.0.0' + implementation 'androidx.core:core-ktx:1.7.0' + // implementation files('/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxAar/sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar') + implementation 'com.github.k2-fsa:sherpa-onnx:v1.10.42' +} diff --git a/android/SherpaOnnxJavaDemo/app/proguard-rules.pro b/android/SherpaOnnxJavaDemo/app/proguard-rules.pro new file mode 100644 index 0000000000..481bb43481 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml b/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000000..947820249a --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/assets/.gitkeep b/android/SherpaOnnxJavaDemo/app/src/main/assets/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java new file mode 100644 index 0000000000..bd5f8a86f1 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java @@ -0,0 +1,18 @@ +package com.k2fsa.sherpa.onnx; + +import androidx.lifecycle.LiveData; +import androidx.lifecycle.MutableLiveData; +import androidx.lifecycle.ViewModel; + +public class AppViewModel extends ViewModel { + private final MutableLiveData speechRecognitionResult = new MutableLiveData<>(); + + public LiveData getSpeechRecognitionResult() { + return speechRecognitionResult; + } + + public void setSpeechRecognitionResult(String result) { + speechRecognitionResult.postValue(result); + } + +} diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java new file mode 100644 index 0000000000..f8acf35cb9 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java @@ -0,0 +1,39 @@ +package com.k2fsa.sherpa.onnx; + +import androidx.annotation.NonNull; +import androidx.lifecycle.ViewModelProvider; +import androidx.lifecycle.ViewModelStore; +import androidx.lifecycle.ViewModelStoreOwner; + + +public class Application extends android.app.Application implements ViewModelStoreOwner { + public static Application sApplication; + + + private AppViewModel viewModel; + private ViewModelStore viewModelStore; + + public static Application getInstance() { + return sApplication; + } + + @Override + public void onCreate() { + super.onCreate(); + sApplication = this; + viewModelStore = new ViewModelStore(); + viewModel = new ViewModelProvider(this).get(AppViewModel.class); + } + + @NonNull + @Override + public ViewModelStore getViewModelStore() { + return viewModelStore; + } + + public AppViewModel getViewModel() { + return viewModel; + } + + +} diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java new file mode 100644 index 0000000000..c465f6c30b --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java @@ -0,0 +1,52 @@ +package com.k2fsa.sherpa.onnx; + +import androidx.appcompat.app.AppCompatActivity; +import androidx.core.content.ContextCompat; +import androidx.lifecycle.ViewModelProvider; + +import android.Manifest; +import android.content.Intent; +import android.os.Bundle; +import android.util.Log; +import android.widget.TextView; + +import com.k2fsa.sherpa.onnx.service.SpeechSherpaRecognitionService; + +import pub.devrel.easypermissions.EasyPermissions; + +public class MainActivity extends AppCompatActivity { + private AppViewModel appViewModel; + private TextView tvText; + private static final int RC_AUDIO_PERM = 123; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + tvText = findViewById(R.id.text); + requestMicrophonePermission(); + } + + + private void startSpeechService() { + Intent serviceIntent = new Intent(this, SpeechSherpaRecognitionService.class); + ContextCompat.startForegroundService(this, serviceIntent); + appViewModel = new ViewModelProvider(Application.getInstance()).get(AppViewModel.class); + appViewModel.getSpeechRecognitionResult().observe(this, this::handleSpeechRecognitionResult); + } + + private void handleSpeechRecognitionResult(String result) { + tvText.setText(result); + } + + private void requestMicrophonePermission() { + String[] perms = {Manifest.permission.RECORD_AUDIO}; + if (EasyPermissions.hasPermissions(this, perms)) { + startSpeechService(); + } else { + EasyPermissions.requestPermissions(MainActivity.this, + "We need access to your microphone for voice recognition", + RC_AUDIO_PERM, perms); + } + } +} \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java new file mode 100644 index 0000000000..02ad4a15d2 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java @@ -0,0 +1,261 @@ +package com.k2fsa.sherpa.onnx.service; + +import android.Manifest; +import android.annotation.SuppressLint; +import android.app.Notification; +import android.app.NotificationChannel; +import android.app.NotificationManager; +import android.app.Service; +import android.content.Intent; +import android.content.pm.PackageManager; +import android.content.res.AssetManager; +import android.media.AudioFormat; +import android.media.AudioRecord; +import android.media.MediaRecorder; +import android.os.Build; +import android.os.IBinder; +import android.text.TextUtils; +import android.util.Log; + +import androidx.core.app.ActivityCompat; +import androidx.core.app.NotificationCompat; + + +import com.k2fsa.sherpa.onnx.AppViewModel; +import com.k2fsa.sherpa.onnx.Application; + +import com.k2fsa.sherpa.onnx.OnlineModelConfig; +import com.k2fsa.sherpa.onnx.OnlineRecognizer; + +import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig; +import com.k2fsa.sherpa.onnx.OnlineStream; +import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig; +import com.k2fsa.sherpa.onnx.R; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import java.util.Objects; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + + +public class SpeechSherpaRecognitionService extends Service { + + private AppViewModel appViewModel; + private OnlineRecognizer recognizer; + private final int sampleRateInHz = 16000; + + private Thread recordingThread; + private boolean isRecording = false; + private int audioSource = MediaRecorder.AudioSource.MIC; + private int channelConfig = AudioFormat.CHANNEL_IN_MONO; + private int audioFormat = AudioFormat.ENCODING_PCM_16BIT; + private AudioRecord audioRecord; + private int idx = 0; + private String lastText = ""; + private ExecutorService executor; + + @Override + public void onCreate() { + super.onCreate(); + startForegroundService(); + // 获取 ViewModel + appViewModel = Application.getInstance().getViewModel(); + int numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); + + if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { + // TODO: Consider calling + // ActivityCompat#requestPermissions + // here to request the missing permissions, and then overriding + // public void onRequestPermissionsResult(int requestCode, String[] permissions, + // int[] grantResults) + // to handle the case where the user grants the permission. See the documentation + // for ActivityCompat#requestPermissions for more details. + return; + } + audioRecord = new AudioRecord( + audioSource, + sampleRateInHz, + channelConfig, + audioFormat, + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM + ); + executor = Executors.newSingleThreadExecutor(); + executor.execute(this::initializeSherpa); + } + + + private void initializeSherpa() { + Log.d("Current Directory", System.getProperty("user.dir")); + String modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"; + initializeSherpaDir(modelDir, modelDir); + OnlineTransducerModelConfig onlineTransducerModelConfig = new OnlineTransducerModelConfig(); + onlineTransducerModelConfig.setEncoder(modelDir + "/encoder-epoch-99-avg-1.int8.onnx"); + onlineTransducerModelConfig.setDecoder(modelDir + "/decoder-epoch-99-avg-1.onnx"); + onlineTransducerModelConfig.setJoiner(modelDir + "/joiner-epoch-99-avg-1.int8.onnx"); + + OnlineModelConfig onlineModelConfig = new OnlineModelConfig(); + onlineModelConfig.setTransducer(onlineTransducerModelConfig); + onlineModelConfig.setTokens(modelDir + "/tokens.txt"); + onlineModelConfig.setModelType("zipformer"); + onlineModelConfig.setDebug(true); + + OnlineRecognizerConfig config = new OnlineRecognizerConfig(); + config.setModelConfig(onlineModelConfig); + recognizer = new OnlineRecognizer(getAssets(), config); + + audioRecord.startRecording(); + startRecognition(); + } + + private void startRecognition() { + isRecording = true; + recordingThread = new Thread(this::processSamples); + recordingThread.start(); + } + + private void processSamples() { + OnlineStream stream = recognizer.createStream(""); + double interval = 0.1; + int bufferSize = (int) (interval * sampleRateInHz); + short[] buffer = new short[bufferSize]; + + while (isRecording) { + int ret = audioRecord != null ? audioRecord.read(buffer, 0, buffer.length) : -1; + if (ret > 0) { + float[] samples = new float[ret]; + for (int i = 0; i < ret; i++) { + samples[i] = buffer[i] / 32768.0f; + } + stream.acceptWaveform(samples, sampleRateInHz); + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + + boolean isEndpoint = recognizer.isEndpoint(stream); + String text = recognizer.getResult(stream).getText(); + if (isEndpoint) { + float[] tailPaddings = new float[(int) (0.8 * sampleRateInHz)]; + stream.acceptWaveform(tailPaddings, sampleRateInHz); + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + text = recognizer.getResult(stream).getText(); + } + + String textToDisplay = lastText; + + if (!TextUtils.isEmpty(text)) { + textToDisplay = TextUtils.isEmpty(text) ? idx + ": " + text : lastText + "\n" + idx + ": " + text; + } + + if (isEndpoint) { + recognizer.reset(stream); + if (!TextUtils.isEmpty(text)) { + lastText = lastText + "\n" + idx + ": " + text; + textToDisplay = lastText; + idx += 1; + } + appViewModel.setSpeechRecognitionResult(textToDisplay); + } + } + + } + stream.release(); + + } + + + @Override + public int onStartCommand(Intent intent, int flags, int startId) { + + return START_STICKY; + } + + @Override + public void onDestroy() { + super.onDestroy(); + audioRecord.stop(); + audioRecord.release(); + executor.shutdown(); + stopForeground(true); + } + + @Override + public IBinder onBind(Intent intent) { + return null; + } + + + @SuppressLint("ForegroundServiceType") + private void startForegroundService() { + String channelId = createNotificationChannel(); + + Notification notification = new NotificationCompat.Builder(this, channelId) + .setContentTitle("Foreground Service") + .setContentText("Running in the foreground") + .setSmallIcon(R.drawable.ic_bg_mic_24) + .build(); + + startForeground(1, notification); + } + + // 创建通知渠道 (针对 Android 8.0 及以上版本) + private String createNotificationChannel() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + String channelId = "speech_channel"; + String channelName = "Speech Channel"; + NotificationChannel channel = new NotificationChannel(channelId, channelName, NotificationManager.IMPORTANCE_LOW); + NotificationManager manager = getSystemService(NotificationManager.class); + if (manager != null) { + manager.createNotificationChannel(channel); + } + return channelId; + } else { + return ""; + } + } + + private void initializeSherpaDir(String assetDir, String internalDir) { + AssetManager assetManager = getAssets(); + File outDir = new File(getFilesDir(), internalDir); + + if (!outDir.exists()) { + outDir.mkdirs(); + } + + try { + String[] assets = assetManager.list(assetDir); + if (assets != null) { + for (String asset : assets) { + String assetPath = assetDir.isEmpty() ? asset : assetDir + "/" + asset; + File outFile = new File(outDir, asset); + if (Objects.requireNonNull(assetManager.list(assetPath)).length > 0) { + outFile.mkdirs(); + initializeSherpaDir(assetPath, internalDir + "/" + asset); // 递归复制子目录 + } else { + InputStream in = assetManager.open(assetPath); + OutputStream out = new FileOutputStream(outFile); + + byte[] buffer = new byte[1024]; + int read; + while ((read = in.read(buffer)) != -1) { + out.write(buffer, 0, read); + } + + in.close(); + out.flush(); + out.close(); + } + } + } + } catch (IOException e) { + Log.e("ModelCopy", "Failed to copy assets", e); + } + } +} diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 0000000000..2b068d1146 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml new file mode 100644 index 0000000000..5eb92eb316 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml @@ -0,0 +1,5 @@ + + + diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 0000000000..07d5da9cbf --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml new file mode 100644 index 0000000000..ae3ea627e9 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,18 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml new file mode 100644 index 0000000000..eca70cfe52 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml new file mode 100644 index 0000000000..eca70cfe52 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp new file mode 100644 index 0000000000..c209e78ecd Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..b2dfe3d1ba Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp new file mode 100644 index 0000000000..4f0f1d64e5 Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..62b611da08 Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp new file mode 100644 index 0000000000..948a3070fe Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..1b9a6956b3 Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp new file mode 100644 index 0000000000..28d4b77f9f Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..9287f50836 Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp new file mode 100644 index 0000000000..aa7d6427e6 Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..9126ae37cb Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml new file mode 100644 index 0000000000..20276125c9 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml @@ -0,0 +1,16 @@ + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml new file mode 100644 index 0000000000..f8c6127d32 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml new file mode 100644 index 0000000000..31aa7267dd --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + SherpaOnnxJavaDemo + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml new file mode 100644 index 0000000000..d9f132e856 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml @@ -0,0 +1,16 @@ + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml new file mode 100644 index 0000000000..fa0f996d2c --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml @@ -0,0 +1,13 @@ + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml new file mode 100644 index 0000000000..9ee9997b0b --- /dev/null +++ b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml @@ -0,0 +1,19 @@ + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/build.gradle b/android/SherpaOnnxJavaDemo/build.gradle new file mode 100644 index 0000000000..5ae9a7b016 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/build.gradle @@ -0,0 +1,9 @@ +// Top-level build file where you can add configuration options common to all sub-projects/modules. +plugins { + id 'com.android.application' version '7.2.2' apply false + id 'com.android.library' version '7.2.2' apply false +} + +task clean(type: Delete) { + delete rootProject.buildDir +} \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/gradle.properties b/android/SherpaOnnxJavaDemo/gradle.properties new file mode 100644 index 0000000000..dab7c28bff --- /dev/null +++ b/android/SherpaOnnxJavaDemo/gradle.properties @@ -0,0 +1,21 @@ +# Project-wide Gradle settings. +# IDE (e.g. Android Studio) users: +# Gradle settings configured through the IDE *will override* +# any settings specified in this file. +# For more details on how to configure your build environment visit +# http://www.gradle.org/docs/current/userguide/build_environment.html +# Specifies the JVM arguments used for the daemon process. +# The setting is particularly useful for tweaking memory settings. +org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 +# When configured, Gradle will run in incubating parallel mode. +# This option should only be used with decoupled projects. More details, visit +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects +# org.gradle.parallel=true +# AndroidX package structure to make it clearer which packages are bundled with the +# Android operating system, and which are packaged with your app"s APK +# https://developer.android.com/topic/libraries/support-library/androidx-rn +android.useAndroidX=true +# Enables namespacing of each library's R class so that its R class includes only the +# resources declared in the library itself and none from the library's dependencies, +# thereby reducing the size of the R class for that library +android.nonTransitiveRClass=true \ No newline at end of file diff --git a/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000..e708b1c023 Binary files /dev/null and b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar differ diff --git a/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000..489dbeed18 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Tue Oct 22 10:59:18 CST 2024 +distributionBase=GRADLE_USER_HOME +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip +distributionPath=wrapper/dists +zipStorePath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME diff --git a/android/SherpaOnnxJavaDemo/gradlew b/android/SherpaOnnxJavaDemo/gradlew new file mode 100644 index 0000000000..4f906e0c81 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/gradlew @@ -0,0 +1,185 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/android/SherpaOnnxJavaDemo/gradlew.bat b/android/SherpaOnnxJavaDemo/gradlew.bat new file mode 100644 index 0000000000..107acd32c4 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/android/SherpaOnnxJavaDemo/settings.gradle b/android/SherpaOnnxJavaDemo/settings.gradle new file mode 100644 index 0000000000..e552eb6899 --- /dev/null +++ b/android/SherpaOnnxJavaDemo/settings.gradle @@ -0,0 +1,17 @@ +pluginManagement { + repositories { + gradlePluginPortal() + google() + mavenCentral() + } +} +dependencyResolutionManagement { + repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) + repositories { + google() + mavenCentral() + maven { url 'https://jitpack.io' } + } +} +rootProject.name = "SherpaOnnxJavaDemo" +include ':app' diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index b17a6ea6c7..b42937ad37 100644 --- a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -151,24 +151,27 @@ class MainActivity : AppCompatActivity() { stream.acceptWaveform(samples, sampleRate = sampleRateInHz) while (kws.isReady(stream)) { kws.decode(stream) - } - val text = kws.getResult(stream).keyword + val text = kws.getResult(stream).keyword + + var textToDisplay = lastText - var textToDisplay = lastText + if (text.isNotBlank()) { + // Remember to reset the stream right after detecting a keyword - if (text.isNotBlank()) { - if (lastText.isBlank()) { - textToDisplay = "$idx: $text" - } else { - textToDisplay = "$idx: $text\n$lastText" + kws.reset(stream) + if (lastText.isBlank()) { + textToDisplay = "$idx: $text" + } else { + textToDisplay = "$idx: $text\n$lastText" + } + lastText = "$idx: $text\n$lastText" + idx += 1 } - lastText = "$idx: $text\n$lastText" - idx += 1 - } - runOnUiThread { - textView.text = textToDisplay + runOnUiThread { + textView.text = textToDisplay + } } } } diff --git a/android/SherpaOnnxSpeakerDiarization/.gitignore b/android/SherpaOnnxSpeakerDiarization/.gitignore new file mode 100644 index 0000000000..aa724b7707 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnxSpeakerDiarization/app/.gitignore b/android/SherpaOnnxSpeakerDiarization/app/.gitignore new file mode 100644 index 0000000000..42afabfd2a --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts new file mode 100644 index 0000000000..7a390ba425 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts @@ -0,0 +1,71 @@ +plugins { + alias(libs.plugins.android.application) + alias(libs.plugins.jetbrains.kotlin.android) +} + +android { + namespace = "com.k2fsa.sherpa.onnx.speaker.diarization" + compileSdk = 34 + + defaultConfig { + applicationId = "com.k2fsa.sherpa.onnx.speaker.diarization" + minSdk = 21 + targetSdk = 34 + versionCode = 1 + versionName = "1.0" + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + vectorDrawables { + useSupportLibrary = true + } + } + + buildTypes { + release { + isMinifyEnabled = false + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = "1.8" + } + buildFeatures { + compose = true + } + composeOptions { + kotlinCompilerExtensionVersion = "1.5.1" + } + packaging { + resources { + excludes += "/META-INF/{AL2.0,LGPL2.1}" + } + } +} + +dependencies { + + implementation(libs.androidx.core.ktx) + implementation(libs.androidx.lifecycle.runtime.ktx) + implementation(libs.androidx.activity.compose) + implementation(platform(libs.androidx.compose.bom)) + implementation(libs.androidx.ui) + implementation(libs.androidx.ui.graphics) + implementation(libs.androidx.ui.tooling.preview) + implementation(libs.androidx.material3) + implementation(libs.androidx.navigation.compose) + implementation(libs.androidx.documentfile) + testImplementation(libs.junit) + androidTestImplementation(libs.androidx.junit) + androidTestImplementation(libs.androidx.espresso.core) + androidTestImplementation(platform(libs.androidx.compose.bom)) + androidTestImplementation(libs.androidx.ui.test.junit4) + debugImplementation(libs.androidx.ui.tooling) + debugImplementation(libs.androidx.ui.test.manifest) +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro new file mode 100644 index 0000000000..481bb43481 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt new file mode 100644 index 0000000000..53d7af15fc --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.k2fsa.sherpa.onnx.speaker.diarization", appContext.packageName) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000000..d58f7e8d77 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt new file mode 100644 index 0000000000..0895cf52cf --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt @@ -0,0 +1,13 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import androidx.compose.ui.graphics.vector.ImageVector + +data class BarItem( + val title: String, + + // see https://www.composables.com/icons + // and + // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary + val image: ImageVector, + val route: String, +) \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt new file mode 100644 index 0000000000..7a25d49b9a --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt @@ -0,0 +1,132 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import android.os.Bundle +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.activity.enableEdgeToEdge +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.padding +import androidx.compose.material3.CenterAlignedTopAppBar +import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.Icon +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.NavigationBar +import androidx.compose.material3.NavigationBarItem +import androidx.compose.material3.Scaffold +import androidx.compose.material3.Surface +import androidx.compose.material3.Text +import androidx.compose.material3.TopAppBarDefaults +import androidx.compose.runtime.Composable +import androidx.compose.runtime.getValue +import androidx.compose.ui.Modifier +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.tooling.preview.Preview +import androidx.navigation.NavGraph.Companion.findStartDestination +import androidx.navigation.NavHostController +import androidx.navigation.compose.NavHost +import androidx.navigation.compose.composable +import androidx.navigation.compose.currentBackStackEntryAsState +import androidx.navigation.compose.rememberNavController +import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HelpScreen +import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HomeScreen +import com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme.SherpaOnnxSpeakerDiarizationTheme + +const val TAG = "sherpa-onnx-sd" + +class MainActivity : ComponentActivity() { + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + enableEdgeToEdge() + setContent { + SherpaOnnxSpeakerDiarizationTheme { + // A surface container using the 'background' color from the theme + Surface( + modifier = Modifier.fillMaxSize(), + color = MaterialTheme.colorScheme.background + ) { + MainScreen() + } + } + } + SpeakerDiarizationObject.initSpeakerDiarization(this.assets) + } +} + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +fun MainScreen(modifier: Modifier = Modifier) { + val navController = rememberNavController() + Scaffold( + topBar = { + CenterAlignedTopAppBar( + colors = TopAppBarDefaults.topAppBarColors( + containerColor = MaterialTheme.colorScheme.primaryContainer, + titleContentColor = MaterialTheme.colorScheme.primary, + ), + title = { + Text( + "Next-gen Kaldi: Speaker Diarization", + fontWeight = FontWeight.Bold, + ) + }, + ) + }, + content = { padding -> + Column(Modifier.padding(padding)) { + NavigationHost(navController = navController) + + } + }, + bottomBar = { + BottomNavigationBar(navController = navController) + } + ) +} + +@Composable +fun NavigationHost(navController: NavHostController) { + NavHost(navController = navController, startDestination = NavRoutes.Home.route) { + composable(NavRoutes.Home.route) { + HomeScreen() + } + + composable(NavRoutes.Help.route) { + HelpScreen() + } + } +} + +@Composable +fun BottomNavigationBar(navController: NavHostController) { + NavigationBar { + val backStackEntry by navController.currentBackStackEntryAsState() + val currentRoute = backStackEntry?.destination?.route + + NavBarItems.BarItems.forEach { navItem -> + NavigationBarItem(selected = currentRoute == navItem.route, + onClick = { + navController.navigate(navItem.route) { + popUpTo(navController.graph.findStartDestination().id) { + saveState = true + } + launchSingleTop = true + restoreState = true + } + }, + icon = { + Icon(imageVector = navItem.image, contentDescription = navItem.title) + }, label = { + Text(text = navItem.title) + }) + } + } +} + +@Preview(showBackground = true) +@Composable +fun MainScreenPreview() { + SherpaOnnxSpeakerDiarizationTheme { + MainScreen() + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt new file mode 100644 index 0000000000..65c737f971 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt @@ -0,0 +1,20 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.Home +import androidx.compose.material.icons.filled.Info + +object NavBarItems { + val BarItems = listOf( + BarItem( + title = "Home", + image = Icons.Filled.Home, + route = "home", + ), + BarItem( + title = "Help", + image = Icons.Filled.Info, + route = "help", + ), + ) +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt new file mode 100644 index 0000000000..2e1ae90b51 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt @@ -0,0 +1,6 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +sealed class NavRoutes(val route: String) { + object Home : NavRoutes("home") + object Help : NavRoutes("help") +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt new file mode 120000 index 0000000000..459cc22ccd --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt new file mode 100644 index 0000000000..940a2b6434 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt @@ -0,0 +1,137 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.screens + +import android.content.Context +import android.media.AudioFormat +import android.media.MediaCodec +import android.media.MediaExtractor +import android.media.MediaFormat +import android.net.Uri + +data class WaveData( + val sampleRate: Int? = null, + val samples: FloatArray? = null, + val msg: String? = null +) + +// It supports only 16-bit encoded wave files +// +// References +// - https://gist.github.com/a-m-s/1991ab18fbcb0fcc2cf9 +// - https://github.com/taehwandev/MediaCodecExample/blob/master/app/src/main/java/tech/thdev/mediacodecexample/audio/AACAudioDecoderThread.kt +fun readUri(context: Context, uri: Uri): WaveData { + val extractor = MediaExtractor() + extractor.setDataSource(context, uri, null) + + val samplesList: MutableList = ArrayList() + + for (i in 0 until extractor.trackCount) { + val format = extractor.getTrackFormat(i) + val mime = format.getString(MediaFormat.KEY_MIME) + if (mime?.startsWith("audio/") == true) { + extractor.selectTrack(i) + + var encoding: Int = -1 + try { + encoding = format.getInteger(MediaFormat.KEY_PCM_ENCODING) + } catch (_: Exception) { + } + + if (encoding != AudioFormat.ENCODING_PCM_16BIT) { + return WaveData(msg = "We support only 16-bit encoded wave files") + } + + val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE) + val decoder = MediaCodec.createDecoderByType(mime) + decoder.configure(format, null, null, 0) + decoder.start() + + val inputBuffers = decoder.inputBuffers + var outputBuffers = decoder.outputBuffers + + val info = MediaCodec.BufferInfo() + var eof = false + + var outputBufferIndex = -1 + + while (true) { + if (!eof) { + val inputBufferIndex = decoder.dequeueInputBuffer(10000) + if (inputBufferIndex > 0) { + val size = extractor.readSampleData(inputBuffers[inputBufferIndex], 0) + if (size < 0) { + decoder.queueInputBuffer( + inputBufferIndex, + 0, + 0, + 0, + MediaCodec.BUFFER_FLAG_END_OF_STREAM + ) + eof = true + } else { + decoder.queueInputBuffer( + inputBufferIndex, + 0, + size, + extractor.sampleTime, + 0 + ) + extractor.advance() + } + } + } // if (!eof) + + if (outputBufferIndex >= 0) { + outputBuffers[outputBufferIndex].position(0) + } + + outputBufferIndex = decoder.dequeueOutputBuffer(info, 10000) + if (outputBufferIndex >= 0) { + if (info.flags != 0) { + decoder.stop() + decoder.release() + + var k = 0 + for (s in samplesList) { + k += s.size + } + if (k == 0) { + return WaveData(msg = "Failed to read selected file") + } + + val ans = FloatArray(k) + k = 0 + for (s in samplesList) { + s.copyInto(ans, k) + k += s.size + } + + return WaveData(sampleRate = sampleRate, samples = ans) + } + + val buffer = outputBuffers[outputBufferIndex] + val chunk = ByteArray(info.size) + buffer[chunk] + buffer.clear() + + val numSamples = info.size / 2 + + val samples = FloatArray(numSamples) + for (k in 0 until numSamples) { + // assume little endian + val s = chunk[2 * k] + (chunk[2 * k + 1] * 256.0f) + + samples[k] = s / 32768.0f + } + samplesList.add(samples) + + decoder.releaseOutputBuffer(outputBufferIndex, false) + } else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) { + outputBuffers = decoder.outputBuffers + } + } + } + } + + extractor.release() + return WaveData(msg = "not an audio file") +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt new file mode 100644 index 0000000000..9df6bd5616 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt @@ -0,0 +1,67 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import android.content.res.AssetManager +import android.util.Log +import com.k2fsa.sherpa.onnx.FastClusteringConfig +import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarization +import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarizationConfig +import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationModelConfig +import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationPyannoteModelConfig +import com.k2fsa.sherpa.onnx.SpeakerEmbeddingExtractorConfig + +// Please download +// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +// then unzip it, rename model.onnx to segmentation.onnx, and mv +// segmentation.onnx to the assets folder +val segmentationModel = "segmentation.onnx" + +// please download it from +// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +// and rename it to embedding.onnx +// and move it to the assets folder +val embeddingModel = "embedding.onnx" + +// in the end, your assets folder should look like below +/* +(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd +/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets +(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh +total 89048 +-rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 embedding.onnx +-rw-r--r-- 1 fangjun staff 5.7M Oct 12 20:28 segmentation.onnx + */ + +object SpeakerDiarizationObject { + var _sd: OfflineSpeakerDiarization? = null + val sd: OfflineSpeakerDiarization + get() { + return _sd!! + } + + fun initSpeakerDiarization(assetManager: AssetManager? = null) { + synchronized(this) { + if (_sd != null) { + return + } + Log.i(TAG, "Initializing sherpa-onnx speaker diarization") + + val config = OfflineSpeakerDiarizationConfig( + segmentation = OfflineSpeakerSegmentationModelConfig( + pyannote = OfflineSpeakerSegmentationPyannoteModelConfig( + segmentationModel + ), + debug = true, + ), + embedding = SpeakerEmbeddingExtractorConfig( + model = embeddingModel, + debug = true, + numThreads = 2, + ), + clustering = FastClusteringConfig(numClusters = -1, threshold = 0.5f), + minDurationOn = 0.2f, + minDurationOff = 0.5f, + ) + _sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config) + } + } +} diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt new file mode 120000 index 0000000000..9bab8fe88a --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt new file mode 100644 index 0000000000..b3640b9e97 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt @@ -0,0 +1,38 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.screens + +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.height +import androidx.compose.foundation.layout.padding +import androidx.compose.material3.Text +import androidx.compose.runtime.Composable +import androidx.compose.ui.Modifier +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp + +@Composable +fun HelpScreen() { + Box(modifier = Modifier.fillMaxSize()) { + Column( + modifier = Modifier.padding(8.dp) + ) { + Text( + "This app accepts only 16kHz 16-bit 1-channel *.wav files. " + + "It has two arguments: Number of speakers and clustering threshold. " + + "If you know the actual number of speakers in the file, please set it. " + + "Otherwise, please set it to 0. In that case, you have to set the threshold. " + + "A larger threshold leads to fewer segmented speakers." + ) + Spacer(modifier = Modifier.height(5.dp)) + Text("The speaker segmentation model is from " + + "pyannote-audio (https://huggingface.co/pyannote/segmentation-3.0), "+ + "whereas the embedding extractor model is from 3D-Speaker (https://github.com/modelscope/3D-Speaker)") + Spacer(modifier = Modifier.height(5.dp)) + Text("Please see http://github.com/k2-fsa/sherpa-onnx ") + Spacer(modifier = Modifier.height(5.dp)) + Text("Everything is open-sourced!", fontSize = 20.sp) + } + } +} diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt new file mode 100644 index 0000000000..a5a9cd31c9 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt @@ -0,0 +1,210 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.screens + +import android.util.Log +import androidx.activity.compose.rememberLauncherForActivityResult +import androidx.activity.result.contract.ActivityResultContracts +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Row +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.size +import androidx.compose.foundation.rememberScrollState +import androidx.compose.foundation.verticalScroll +import androidx.compose.material3.Button +import androidx.compose.material3.OutlinedTextField +import androidx.compose.material3.Text +import androidx.compose.runtime.Composable +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.platform.LocalClipboardManager +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.text.AnnotatedString +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp +import androidx.documentfile.provider.DocumentFile +import com.k2fsa.sherpa.onnx.speaker.diarization.SpeakerDiarizationObject +import com.k2fsa.sherpa.onnx.speaker.diarization.TAG +import kotlin.concurrent.thread + + +private var samples: FloatArray? = null + +@Composable +fun HomeScreen() { + val context = LocalContext.current + + var sampleRate: Int + var filename by remember { mutableStateOf("") } + var status by remember { mutableStateOf("") } + var progress by remember { mutableStateOf("") } + val clipboardManager = LocalClipboardManager.current + var done by remember { mutableStateOf(false) } + var fileIsOk by remember { mutableStateOf(false) } + var started by remember { mutableStateOf(false) } + var numSpeakers by remember { mutableStateOf(0) } + var threshold by remember { mutableStateOf(0.5f) } + + + val callback = here@{ numProcessedChunks: Int, numTotalChunks: Int, arg: Long -> + Int + val percent = 100.0 * numProcessedChunks / numTotalChunks + progress = "%.2f%%".format(percent) + Log.i(TAG, progress) + return@here 0 + } + + val launcher = rememberLauncherForActivityResult(ActivityResultContracts.OpenDocument()) { + it?.let { + val documentFile = DocumentFile.fromSingleUri(context, it) + filename = documentFile?.name ?: "" + + progress = "" + done = false + fileIsOk = false + + if (filename.isNotEmpty()) { + val data = readUri(context, it) + Log.i(TAG, "sample rate: ${data.sampleRate}") + Log.i(TAG, "numSamples: ${data.samples?.size ?: 0}") + if (data.msg != null) { + Log.i(TAG, "failed to read $filename") + status = data.msg + } else if (data.sampleRate != SpeakerDiarizationObject.sd.sampleRate()) { + status = + "Expected sample rate: ${SpeakerDiarizationObject.sd.sampleRate()}. Given wave file with sample rate: ${data.sampleRate}" + } else { + samples = data.samples!! + fileIsOk = true + } + } + } + } + + Column( + modifier = Modifier.padding(10.dp), + verticalArrangement = Arrangement.Top, + ) { + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceEvenly, + verticalAlignment = Alignment.CenterVertically + ) { + + Button(onClick = { + launcher.launch(arrayOf("audio/*")) + }) { + Text("Select a .wav file") + } + + Button(enabled = fileIsOk && !started, + onClick = { + Log.i(TAG, "started") + Log.i(TAG, "num samples: ${samples?.size}") + started = true + progress = "" + + val config = SpeakerDiarizationObject.sd.config + config.clustering.numClusters = numSpeakers + config.clustering.threshold = threshold + + SpeakerDiarizationObject.sd.setConfig(config) + + thread(true) { + done = false + status = "Started! Please wait" + val segments = SpeakerDiarizationObject.sd.processWithCallback( + samples!!, + callback = callback, + ) + done = true + started = false + status = "" + for (s in segments) { + val start = "%.2f".format(s.start) + val end = "%.2f".format(s.end) + val speaker = "speaker_%02d".format(s.speaker) + status += "$start -- $end $speaker\n" + Log.i(TAG, "$start -- $end $speaker") + } + + Log.i(TAG, status) + } + }) { + Text("Start") + } + if (progress.isNotEmpty()) { + Text(progress, fontSize = 25.sp) + } + } + + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceEvenly, + verticalAlignment = Alignment.CenterVertically + ) { + OutlinedTextField( + value = numSpeakers.toString(), + onValueChange = { + if (it.isEmpty() || it.isBlank()) { + numSpeakers = 0 + } else { + numSpeakers = it.toIntOrNull() ?: 0 + } + }, + label = { + Text("Number of Speakers") + }, + ) + } + + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceEvenly, + verticalAlignment = Alignment.CenterVertically + ) { + OutlinedTextField( + value = threshold.toString(), + onValueChange = { + if (it.isEmpty() || it.isBlank()) { + threshold = 0.5f + } else { + threshold = it.toFloatOrNull() ?: 0.5f + } + }, + label = { + Text("Clustering threshold") + }, + ) + } + + if (filename.isNotEmpty()) { + Text(text = "Selected $filename") + Spacer(Modifier.size(20.dp)) + } + + if (done) { + Button(onClick = { + clipboardManager.setText(AnnotatedString(status)) + progress = "Copied!" + }) { + Text("Copy result") + } + Spacer(Modifier.size(20.dp)) + } + + if (status.isNotEmpty()) { + Text( + status, + modifier = Modifier.verticalScroll(rememberScrollState()), + ) + } + + + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt new file mode 100644 index 0000000000..a96515d3da --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt @@ -0,0 +1,11 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme + +import androidx.compose.ui.graphics.Color + +val Purple80 = Color(0xFFD0BCFF) +val PurpleGrey80 = Color(0xFFCCC2DC) +val Pink80 = Color(0xFFEFB8C8) + +val Purple40 = Color(0xFF6650a4) +val PurpleGrey40 = Color(0xFF625b71) +val Pink40 = Color(0xFF7D5260) \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt new file mode 100644 index 0000000000..5dbbe7e59e --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt @@ -0,0 +1,58 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme + +import android.app.Activity +import android.os.Build +import androidx.compose.foundation.isSystemInDarkTheme +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.darkColorScheme +import androidx.compose.material3.dynamicDarkColorScheme +import androidx.compose.material3.dynamicLightColorScheme +import androidx.compose.material3.lightColorScheme +import androidx.compose.runtime.Composable +import androidx.compose.ui.platform.LocalContext + +private val DarkColorScheme = darkColorScheme( + primary = Purple80, + secondary = PurpleGrey80, + tertiary = Pink80 +) + +private val LightColorScheme = lightColorScheme( + primary = Purple40, + secondary = PurpleGrey40, + tertiary = Pink40 + + /* Other default colors to override + background = Color(0xFFFFFBFE), + surface = Color(0xFFFFFBFE), + onPrimary = Color.White, + onSecondary = Color.White, + onTertiary = Color.White, + onBackground = Color(0xFF1C1B1F), + onSurface = Color(0xFF1C1B1F), + */ +) + +@Composable +fun SherpaOnnxSpeakerDiarizationTheme( + darkTheme: Boolean = isSystemInDarkTheme(), + // Dynamic color is available on Android 12+ + dynamicColor: Boolean = true, + content: @Composable () -> Unit +) { + val colorScheme = when { + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { + val context = LocalContext.current + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) + } + + darkTheme -> DarkColorScheme + else -> LightColorScheme + } + + MaterialTheme( + colorScheme = colorScheme, + typography = Typography, + content = content + ) +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt new file mode 100644 index 0000000000..39a81b9418 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt @@ -0,0 +1,34 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme + +import androidx.compose.material3.Typography +import androidx.compose.ui.text.TextStyle +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.sp + +// Set of Material typography styles to start with +val Typography = Typography( + bodyLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 16.sp, + lineHeight = 24.sp, + letterSpacing = 0.5.sp + ) + /* Other default text styles to override + titleLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 22.sp, + lineHeight = 28.sp, + letterSpacing = 0.sp + ), + labelSmall = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Medium, + fontSize = 11.sp, + lineHeight = 16.sp, + letterSpacing = 0.5.sp + ) + */ +) \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 0000000000..2b068d1146 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 0000000000..07d5da9cbf --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml new file mode 100644 index 0000000000..6f3b755bf5 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml new file mode 100644 index 0000000000..6f3b755bf5 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp new file mode 100644 index 0000000000..c209e78ecd Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..b2dfe3d1ba Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp new file mode 100644 index 0000000000..4f0f1d64e5 Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..62b611da08 Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp new file mode 100644 index 0000000000..948a3070fe Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..1b9a6956b3 Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp new file mode 100644 index 0000000000..28d4b77f9f Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..9287f50836 Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp new file mode 100644 index 0000000000..aa7d6427e6 Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp new file mode 100644 index 0000000000..9126ae37cb Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml new file mode 100644 index 0000000000..f8c6127d32 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml new file mode 100644 index 0000000000..05f2df0901 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + SherpaOnnxSpeakerDiarization + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml new file mode 100644 index 0000000000..34d1d96ed3 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml @@ -0,0 +1,5 @@ + + + + + + + +

+ Next-gen Kaldi + WebAssembly
+ Speaker Diarization
with sherpa-onnx +

+
+ Loading model ... ... +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + + + + diff --git a/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js new file mode 100644 index 0000000000..7410134808 --- /dev/null +++ b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js @@ -0,0 +1,299 @@ + +function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('config' in config) { + freeConfig(config.config, Module) + } + + if ('segmentation' in config) { + freeConfig(config.segmentation, Module) + } + + if ('embedding' in config) { + freeConfig(config.embedding, Module) + } + + if ('clustering' in config) { + freeConfig(config.clustering, Module) + } + + Module._free(config.ptr); +} + +function initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig( + config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const n = modelLen; + const buffer = Module._malloc(n); + + const len = 1 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + + return { + buffer: buffer, ptr: ptr, len: len, + } +} + +function initSherpaOnnxOfflineSpeakerSegmentationModelConfig(config, Module) { + if (!('pyannote' in config)) { + config.pyannote = { + model: '', + }; + } + + const pyannote = initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig( + config.pyannote, Module); + + const len = pyannote.len + 3 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(pyannote.ptr, pyannote.len, ptr + offset); + offset += pyannote.len; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.debug || 0, 'i32'); + offset += 4; + + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const buffer = Module._malloc(providerLen); + Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen); + Module.setValue(ptr + offset, buffer, 'i8*'); + + return { + buffer: buffer, + ptr: ptr, + len: len, + config: pyannote, + }; +} + +function initSherpaOnnxSpeakerEmbeddingExtractorConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const n = modelLen + providerLen; + const buffer = Module._malloc(n); + + const len = 4 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); + offset += providerLen; + + offset = 0 + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.debug || 0, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer + modelLen, 'i8*'); + offset += 4; + + return { + buffer: buffer, + ptr: ptr, + len: len, + }; +} + +function initSherpaOnnxFastClusteringConfig(config, Module) { + const len = 2 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.setValue(ptr + offset, config.numClusters || -1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.threshold || 0.5, 'float'); + offset += 4; + + return { + ptr: ptr, + len: len, + }; +} + +function initSherpaOnnxOfflineSpeakerDiarizationConfig(config, Module) { + if (!('segmentation' in config)) { + config.segmentation = { + pyannote: {model: ''}, + numThreads: 1, + debug: 0, + provider: 'cpu', + }; + } + + if (!('embedding' in config)) { + config.embedding = { + model: '', + numThreads: 1, + debug: 0, + provider: 'cpu', + }; + } + + if (!('clustering' in config)) { + config.clustering = { + numClusters: -1, + threshold: 0.5, + }; + } + + const segmentation = initSherpaOnnxOfflineSpeakerSegmentationModelConfig( + config.segmentation, Module); + + const embedding = + initSherpaOnnxSpeakerEmbeddingExtractorConfig(config.embedding, Module); + + const clustering = + initSherpaOnnxFastClusteringConfig(config.clustering, Module); + + const len = segmentation.len + embedding.len + clustering.len + 2 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(segmentation.ptr, segmentation.len, ptr + offset); + offset += segmentation.len; + + Module._CopyHeap(embedding.ptr, embedding.len, ptr + offset); + offset += embedding.len; + + Module._CopyHeap(clustering.ptr, clustering.len, ptr + offset); + offset += clustering.len; + + Module.setValue(ptr + offset, config.minDurationOn || 0.2, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minDurationOff || 0.5, 'float'); + offset += 4; + + return { + ptr: ptr, len: len, segmentation: segmentation, embedding: embedding, + clustering: clustering, + } +} + +class OfflineSpeakerDiarization { + constructor(configObj, Module) { + const config = + initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, Module) + // Module._MyPrint(config.ptr); + + const handle = + Module._SherpaOnnxCreateOfflineSpeakerDiarization(config.ptr); + + freeConfig(config, Module); + + this.handle = handle; + this.sampleRate = + Module._SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(this.handle); + this.Module = Module + + this.config = configObj; + } + + free() { + this.Module._SherpaOnnxDestroyOfflineSpeakerDiarization(this.handle); + this.handle = 0 + } + + setConfig(configObj) { + if (!('clustering' in configObj)) { + return; + } + + const config = + initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, this.Module); + + this.Module._SherpaOnnxOfflineSpeakerDiarizationSetConfig( + this.handle, config.ptr); + + freeConfig(config, Module); + + this.config.clustering = configObj.clustering; + } + + process(samples) { + const pointer = + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + + let r = this.Module._SherpaOnnxOfflineSpeakerDiarizationProcess( + this.handle, pointer, samples.length); + this.Module._free(pointer); + + let numSegments = + this.Module._SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r); + + let segments = + this.Module._SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime( + r); + + let ans = []; + + let sizeOfSegment = 3 * 4; + for (let i = 0; i < numSegments; ++i) { + let p = segments + i * sizeOfSegment + + let start = this.Module.HEAPF32[p / 4 + 0]; + let end = this.Module.HEAPF32[p / 4 + 1]; + let speaker = this.Module.HEAP32[p / 4 + 2]; + + ans.push({start: start, end: end, speaker: speaker}); + } + + this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments); + this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r); + + return ans; + } +} + +function createOfflineSpeakerDiarization(Module, myConfig) { + let config = { + segmentation: { + pyannote: {model: './segmentation.onnx'}, + debug: 1, + }, + embedding: { + model: './embedding.onnx', + debug: 1, + }, + clustering: {numClusters: -1, threshold: 0.5}, + minDurationOn: 0.3, + minDurationOff: 0.5, + }; + + if (myConfig) { + config = myConfig; + } + + return new OfflineSpeakerDiarization(config, Module); +} + +if (typeof process == 'object' && typeof process.versions == 'object' && + typeof process.versions.node == 'string') { + module.exports = { + createOfflineSpeakerDiarization, + }; +} diff --git a/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc b/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc new file mode 100644 index 0000000000..6e83f61d8b --- /dev/null +++ b/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc @@ -0,0 +1,63 @@ +// wasm/sherpa-onnx-wasm-main-speaker-diarization.cc +// +// Copyright (c) 2024 Xiaomi Corporation +#include + +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +// see also +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html + +extern "C" { + +static_assert(sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) == + 1 * 4, + ""); + +static_assert( + sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) == + sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) + 3 * 4, + ""); + +static_assert(sizeof(SherpaOnnxFastClusteringConfig) == 2 * 4, ""); + +static_assert(sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) == 4 * 4, ""); + +static_assert(sizeof(SherpaOnnxOfflineSpeakerDiarizationConfig) == + sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) + + sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) + + sizeof(SherpaOnnxFastClusteringConfig) + 2 * 4, + ""); + +void MyPrint(const SherpaOnnxOfflineSpeakerDiarizationConfig *sd_config) { + const auto &segmentation = sd_config->segmentation; + const auto &embedding = sd_config->embedding; + const auto &clustering = sd_config->clustering; + + fprintf(stdout, "----------segmentation config----------\n"); + fprintf(stdout, "pyannote model: %s\n", segmentation.pyannote.model); + fprintf(stdout, "num threads: %d\n", segmentation.num_threads); + fprintf(stdout, "debug: %d\n", segmentation.debug); + fprintf(stdout, "provider: %s\n", segmentation.provider); + + fprintf(stdout, "----------embedding config----------\n"); + fprintf(stdout, "model: %s\n", embedding.model); + fprintf(stdout, "num threads: %d\n", embedding.num_threads); + fprintf(stdout, "debug: %d\n", embedding.debug); + fprintf(stdout, "provider: %s\n", embedding.provider); + + fprintf(stdout, "----------clustering config----------\n"); + fprintf(stdout, "num_clusters: %d\n", clustering.num_clusters); + fprintf(stdout, "threshold: %.3f\n", clustering.threshold); + + fprintf(stdout, "min_duration_on: %.3f\n", sd_config->min_duration_on); + fprintf(stdout, "min_duration_off: %.3f\n", sd_config->min_duration_off); +} + +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} +} diff --git a/wasm/tts/sherpa-onnx-tts.js b/wasm/tts/sherpa-onnx-tts.js index 4d68b854f4..e08c9e8d81 100644 --- a/wasm/tts/sherpa-onnx-tts.js +++ b/wasm/tts/sherpa-onnx-tts.js @@ -8,6 +8,14 @@ function freeConfig(config, Module) { freeConfig(config.config, Module) } + if ('matcha' in config) { + freeConfig(config.matcha, Module) + } + + if ('kokoro' in config) { + freeConfig(config.kokoro, Module) + } + Module._free(config.ptr); } @@ -66,11 +74,181 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { } } +function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) { + const acousticModelLen = Module.lengthBytesUTF8(config.acousticModel) + 1; + const vocoderLen = Module.lengthBytesUTF8(config.vocoder) + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + + const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen + + dataDirLen + dictDirLen; + + const buffer = Module._malloc(n); + + const len = 8 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8( + config.acousticModel || '', buffer + offset, acousticModelLen); + offset += acousticModelLen; + + Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen); + offset += vocoderLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += acousticModelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += vocoderLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += lexiconLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 16, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 20, config.noiseScale || 0.667, 'float'); + Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float'); + Module.setValue(ptr + 28, buffer + offset, 'i8*'); + offset += dictDirLen; + + return { + buffer: buffer, ptr: ptr, len: len, + } +} + +function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model) + 1; + const voicesLen = Module.lengthBytesUTF8(config.voices) + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + + const n = + modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen + lexiconLen; + + const buffer = Module._malloc(n); + + const len = 7 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + Module.stringToUTF8(config.voices || '', buffer + offset, voicesLen); + offset += voicesLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += modelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += voicesLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float'); + + Module.setValue(ptr + 20, buffer + offset, 'i8*'); + offset += dictDirLen; + + Module.setValue(ptr + 24, buffer + offset, 'i8*'); + offset += lexiconLen; + + return { + buffer: buffer, ptr: ptr, len: len, + } +} + function initSherpaOnnxOfflineTtsModelConfig(config, Module) { + if (!('offlineTtsVitsModelConfig' in config)) { + config.offlineTtsVitsModelConfig = { + model: '', + lexicon: '', + tokens: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + dataDir: '', + dictDir: '', + }; + } + + if (!('offlineTtsMatchaModelConfig' in config)) { + config.offlineTtsMatchaModelConfig = { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + noiseScale: 0.667, + lengthScale: 1.0, + dataDir: '', + dictDir: '', + }; + } + + if (!('offlineTtsKokoroModelConfig' in config)) { + config.offlineTtsKokoroModelConfig = { + model: '', + voices: '', + tokens: '', + lengthScale: 1.0, + dataDir: '', + dictDir: '', + lexicon: '', + }; + } + + const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig( config.offlineTtsVitsModelConfig, Module); - const len = vitsModelConfig.len + 3 * 4; + const matchaModelConfig = initSherpaOnnxOfflineTtsMatchaModelConfig( + config.offlineTtsMatchaModelConfig, Module); + + const kokoroModelConfig = initSherpaOnnxOfflineTtsKokoroModelConfig( + config.offlineTtsKokoroModelConfig, Module); + + const len = vitsModelConfig.len + matchaModelConfig.len + + kokoroModelConfig.len + 3 * 4; + const ptr = Module._malloc(len); let offset = 0; @@ -87,9 +265,17 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { const buffer = Module._malloc(providerLen); Module.stringToUTF8(config.provider, buffer, providerLen); Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module._CopyHeap(matchaModelConfig.ptr, matchaModelConfig.len, ptr + offset); + offset += matchaModelConfig.len; + + Module._CopyHeap(kokoroModelConfig.ptr, kokoroModelConfig.len, ptr + offset); + offset += kokoroModelConfig.len; return { buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, + matcha: matchaModelConfig, kokoro: kokoroModelConfig, } } @@ -195,12 +381,37 @@ function createOfflineTts(Module, myConfig) { noiseScaleW: 0.8, lengthScale: 1.0, }; + + const offlineTtsMatchaModelConfig = { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }; + + const offlineTtsKokoroModelConfig = { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }; + const offlineTtsModelConfig = { offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, + offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, numThreads: 1, debug: 1, provider: 'cpu', }; + let offlineTtsConfig = { offlineTtsModelConfig: offlineTtsModelConfig, ruleFsts: '', diff --git a/wasm/tts/sherpa-onnx-wasm-main-tts.cc b/wasm/tts/sherpa-onnx-wasm-main-tts.cc index 872a1c853a..07bf4d429c 100644 --- a/wasm/tts/sherpa-onnx-wasm-main-tts.cc +++ b/wasm/tts/sherpa-onnx-wasm-main-tts.cc @@ -14,8 +14,12 @@ extern "C" { static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 7 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == - sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4, + sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + + sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) + + sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) + 3 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == sizeof(SherpaOnnxOfflineTtsModelConfig) + 3 * 4, @@ -24,6 +28,8 @@ static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { auto tts_model_config = &tts_config->model; auto vits_model_config = &tts_model_config->vits; + auto matcha_model_config = &tts_model_config->matcha; + auto kokoro = &tts_model_config->kokoro; fprintf(stdout, "----------vits model config----------\n"); fprintf(stdout, "model: %s\n", vits_model_config->model); fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon); @@ -34,6 +40,25 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale); fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir); + fprintf(stdout, "----------matcha model config----------\n"); + fprintf(stdout, "acoustic_model: %s\n", matcha_model_config->acoustic_model); + fprintf(stdout, "vocoder: %s\n", matcha_model_config->vocoder); + fprintf(stdout, "lexicon: %s\n", matcha_model_config->lexicon); + fprintf(stdout, "tokens: %s\n", matcha_model_config->tokens); + fprintf(stdout, "data_dir: %s\n", matcha_model_config->data_dir); + fprintf(stdout, "noise scale: %.3f\n", matcha_model_config->noise_scale); + fprintf(stdout, "length scale: %.3f\n", matcha_model_config->length_scale); + fprintf(stdout, "dict_dir: %s\n", matcha_model_config->dict_dir); + + fprintf(stdout, "----------kokoro model config----------\n"); + fprintf(stdout, "model: %s\n", kokoro->model); + fprintf(stdout, "voices: %s\n", kokoro->voices); + fprintf(stdout, "tokens: %s\n", kokoro->tokens); + fprintf(stdout, "data_dir: %s\n", kokoro->data_dir); + fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale); + fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir); + fprintf(stdout, "lexicon: %s\n", kokoro->lexicon); + fprintf(stdout, "----------tts model config----------\n"); fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); fprintf(stdout, "debug: %d\n", tts_model_config->debug); diff --git a/wasm/vad-asr/app-vad-asr.js b/wasm/vad-asr/app-vad-asr.js index 5cb172e644..68b7b7da1f 100644 --- a/wasm/vad-asr/app-vad-asr.js +++ b/wasm/vad-asr/app-vad-asr.js @@ -111,6 +111,13 @@ function initOfflineRecognizer() { }; } else if (fileExists('telespeech.onnx')) { config.modelConfig.telespeechCtc = './telespeech.onnx'; + } else if (fileExists('moonshine-preprocessor.onnx')) { + config.modelConfig.moonshine = { + preprocessor: './moonshine-preprocessor.onnx', + encoder: './moonshine-encoder.onnx', + uncachedDecoder: './moonshine-uncached-decoder.onnx', + cachedDecoder: './moonshine-cached-decoder.onnx' + }; } else { console.log('Please specify a model.'); alert('Please specify a model.');